52 #define DEBUG_TYPE "x86tti"
121 bool Vector = (ClassID == 1);
140 if (ST->
hasAVX512() && PreferVectorWidth >= 512)
142 if (ST->
hasAVX() && PreferVectorWidth >= 256)
144 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
186 Op2Info, Opd1PropInfo,
187 Opd2PropInfo,
Args, CxtI);
201 Opd1PropInfo, Opd2PropInfo);
208 assert(ISD &&
"Invalid opcode");
210 if (ISD ==
ISD::MUL &&
Args.size() == 2 &&
LT.second.isVector() &&
213 bool Op1Signed =
false, Op2Signed =
false;
216 unsigned OpMinSize =
std::max(Op1MinSize, Op2MinSize);
221 if (OpMinSize <= 15 && !ST->isPMADDWDSlow()) {
223 isa<ConstantDataVector>(
Args[0]) || isa<ConstantVector>(
Args[0]);
225 isa<ConstantDataVector>(
Args[1]) || isa<ConstantVector>(
Args[1]);
226 bool Op1Sext = isa<SExtInst>(
Args[0]) &&
227 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
228 bool Op2Sext = isa<SExtInst>(
Args[1]) &&
229 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
231 bool IsZeroExtended = !Op1Signed || !Op2Signed;
232 bool IsConstant = Op1Constant || Op2Constant;
233 bool IsSext = Op1Sext || Op2Sext;
234 if (IsConstant || IsZeroExtended || IsSext)
301 if (ST->useGLMDivSqrtCosts())
304 return LT.first * Entry->Cost;
329 if (ST->useSLMArithCosts()) {
333 bool Op1Signed =
false;
335 bool Op2Signed =
false;
338 bool SignedMode = Op1Signed || Op2Signed;
339 unsigned OpMinSize =
std::max(Op1MinSize, Op2MinSize);
343 if (!SignedMode && OpMinSize <= 8)
347 if (!SignedMode && OpMinSize <= 16)
353 return LT.first * Entry->Cost;
357 static const CostTblEntry AVX512BWUniformConstCostTable[] = {
365 if (
const auto *Entry =
CostTableLookup(AVX512BWUniformConstCostTable, ISD,
367 return LT.first * Entry->Cost;
370 static const CostTblEntry AVX512UniformConstCostTable[] = {
387 if (
const auto *Entry =
CostTableLookup(AVX512UniformConstCostTable, ISD,
389 return LT.first * Entry->Cost;
392 static const CostTblEntry AVX2UniformConstCostTable[] = {
407 if (
const auto *Entry =
CostTableLookup(AVX2UniformConstCostTable, ISD,
409 return LT.first * Entry->Cost;
412 static const CostTblEntry SSE2UniformConstCostTable[] = {
433 ST->
hasSSE2() && !ST->hasXOP()) {
434 if (
const auto *Entry =
436 return LT.first * Entry->Cost;
453 if (
const auto *Entry =
455 return LT.first * Entry->Cost;
476 if (
const auto *Entry =
478 return LT.first * Entry->Cost;
500 return LT.first * Entry->Cost;
535 return LT.first * 32;
537 return LT.first * 38;
539 return LT.first * 15;
541 return LT.first * 20;
544 return LT.first * Entry->Cost;
571 return LT.first * Entry->Cost;
592 if (
const auto *Entry =
594 return LT.first * Entry->Cost;
614 if (
const auto *Entry =
616 return LT.first * Entry->Cost;
628 return LT.first * Entry->Cost;
639 return LT.first * Entry->Cost;
690 return LT.first * Entry->Cost;
732 return LT.first * Entry->Cost;
773 if (
const auto *Entry =
775 return LT.first * Entry->Cost;
778 static const CostTblEntry SSE2UniformShiftCostTable[] = {
802 if (
const auto *Entry =
804 return LT.first * Entry->Cost;
876 return LT.first * Entry->Cost;
934 return LT.first * Entry->Cost;
962 return LT.first * Entry->Cost;
981 return LT.first * Entry->Cost;
1024 return LT.first * Entry->Cost;
1042 return LT.first * Entry->Cost;
1052 return LT.first * Entry->Cost;
1065 return LT.first * Entry->Cost;
1078 return 20 *
LT.first *
LT.second.getVectorNumElements() * ScalarCost;
1108 int NumElts =
LT.second.getVectorNumElements();
1109 if ((Index % NumElts) == 0)
1111 std::pair<InstructionCost, MVT> SubLT =
1113 if (SubLT.second.isVector()) {
1114 int NumSubElts = SubLT.second.getVectorNumElements();
1115 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1123 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1124 if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
1125 (NumSubElts % OrigSubElts) == 0 &&
1126 LT.second.getVectorElementType() ==
1127 SubLT.second.getVectorElementType() &&
1128 LT.second.getVectorElementType().getSizeInBits() ==
1130 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1131 "Unexpected number of elements!");
1133 LT.second.getVectorNumElements());
1135 SubLT.second.getVectorNumElements());
1136 int ExtractIndex =
alignDown((Index % NumElts), NumSubElts);
1143 return ExtractCost + 1;
1146 "Unexpected vector size");
1148 return ExtractCost + 2;
1157 int NumElts =
LT.second.getVectorNumElements();
1158 std::pair<InstructionCost, MVT> SubLT =
1160 if (SubLT.second.isVector()) {
1161 int NumSubElts = SubLT.second.getVectorNumElements();
1162 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1175 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1201 if (
const auto *Entry =
1210 MVT LegalVT =
LT.second;
1215 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1220 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1242 unsigned NormalizedVF =
1247 copy(
Mask, NormalizedMask.begin());
1248 unsigned PrevSrcReg = 0;
1252 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1253 [
this, SingleOpTy, &PrevSrcReg, &PrevRegMask,
1254 &Cost](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
1258 if (PrevRegMask.empty() || PrevSrcReg != SrcReg ||
1259 PrevRegMask != RegMask)
1261 RegMask, 0,
nullptr);
1267 if (SrcReg != DestReg &&
1272 PrevSrcReg = SrcReg;
1273 PrevRegMask = RegMask;
1297 LT.first = NumOfDests * NumOfShufflesPerDest;
1318 if (!ST->useSoftFloat() && ST->hasFP16())
1319 if (
const auto *Entry =
1321 return LT.first * Entry->Cost;
1336 if (
const auto *Entry =
1338 return LT.first * Entry->Cost;
1362 if (
const auto *Entry =
1364 return LT.first * Entry->Cost;
1425 return LT.first * Entry->Cost;
1466 return LT.first * Entry->Cost;
1488 return LT.first * Entry->Cost;
1535 return LT.first * Entry->Cost;
1548 return LT.first * Entry->Cost;
1569 return LT.first * Entry->Cost;
1614 if (
const auto *Entry =
1617 LT.second.getVectorElementCount()) &&
1618 "Table entry missing from isLegalBroadcastLoad()");
1619 return LT.first * Entry->Cost;
1623 return LT.first * Entry->Cost;
1636 return LT.first * Entry->Cost;
1647 assert(ISD &&
"Invalid opcode");
1652 return Cost == 0 ? 0 : 1;
2462 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2463 return AdjustCost(Entry->Cost);
2467 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2468 return AdjustCost(Entry->Cost);
2472 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2473 return AdjustCost(Entry->Cost);
2478 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2479 return AdjustCost(Entry->Cost);
2483 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2484 return AdjustCost(Entry->Cost);
2488 SimpleDstTy, SimpleSrcTy))
2489 return AdjustCost(Entry->Cost);
2493 SimpleDstTy, SimpleSrcTy))
2494 return AdjustCost(Entry->Cost);
2499 SimpleDstTy, SimpleSrcTy))
2500 return AdjustCost(Entry->Cost);
2505 SimpleDstTy, SimpleSrcTy))
2506 return AdjustCost(Entry->Cost);
2511 SimpleDstTy, SimpleSrcTy))
2512 return AdjustCost(Entry->Cost);
2518 std::pair<InstructionCost, MVT> LTDest =
2528 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
2529 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2533 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
2534 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2538 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
2539 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2544 LTDest.second, LTSrc.second))
2545 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2549 LTDest.second, LTSrc.second))
2550 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2554 LTDest.second, LTSrc.second))
2555 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2559 LTDest.second, LTSrc.second))
2560 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2564 LTDest.second, LTSrc.second))
2565 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2569 LTDest.second, LTSrc.second))
2570 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2574 LTDest.second, LTSrc.second))
2575 return AdjustCost(
std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2580 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
2581 Type *ExtSrc = Src->getWithNewBitWidth(32);
2587 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
2597 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
2598 Type *TruncDst = Dst->getWithNewBitWidth(32);
2621 MVT MTy =
LT.second;
2624 assert(ISD &&
"Invalid opcode");
2626 unsigned ExtraCost = 0;
2627 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
2639 Pred = cast<CmpInst>(
I)->getPredicate();
2642 case CmpInst::Predicate::ICMP_NE:
2646 case CmpInst::Predicate::ICMP_SGE:
2647 case CmpInst::Predicate::ICMP_SLE:
2651 case CmpInst::Predicate::ICMP_ULT:
2652 case CmpInst::Predicate::ICMP_UGT:
2657 case CmpInst::Predicate::ICMP_ULE:
2658 case CmpInst::Predicate::ICMP_UGE:
2669 case CmpInst::Predicate::BAD_ICMP_PREDICATE:
2670 case CmpInst::Predicate::BAD_FCMP_PREDICATE:
2798 if (ST->useSLMArithCosts())
2800 return LT.first * (ExtraCost + Entry->Cost);
2804 return LT.first * (ExtraCost + Entry->Cost);
2808 return LT.first * (ExtraCost + Entry->Cost);
2812 return LT.first * (ExtraCost + Entry->Cost);
2816 return LT.first * (ExtraCost + Entry->Cost);
2820 return LT.first * (ExtraCost + Entry->Cost);
2824 return LT.first * (ExtraCost + Entry->Cost);
2828 return LT.first * (ExtraCost + Entry->Cost);
2832 return LT.first * (ExtraCost + Entry->Cost);