63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
165 bool Vector = (ClassID == 1);
172 if (!
Vector && ST->hasEGPR())
188 auto *VTy = dyn_cast<FixedVectorType>(Ty);
189 if (!Ty->
isIntegerTy() && (!VTy || VTy->getNumElements() != 1))
192 switch (cast<IntegerType>(ScalarTy)->
getBitWidth()) {
209 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
211 if (ST->
hasAVX() && PreferVectorWidth >= 256)
213 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
254 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
271 assert(ISD &&
"Invalid opcode");
273 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
274 (LT.second.getScalarType() == MVT::i32 ||
275 LT.second.getScalarType() == MVT::i64)) {
277 bool Op1Signed =
false, Op2Signed =
false;
280 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
281 bool SignedMode = Op1Signed || Op2Signed;
286 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
287 LT.second.getScalarType() == MVT::i32) {
289 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
291 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
292 bool Op1Sext = isa<SExtInst>(Args[0]) &&
293 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
294 bool Op2Sext = isa<SExtInst>(Args[1]) &&
295 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
297 bool IsZeroExtended = !Op1Signed || !Op2Signed;
298 bool IsConstant = Op1Constant || Op2Constant;
299 bool IsSext = Op1Sext || Op2Sext;
300 if (IsConstant || IsZeroExtended || IsSext)
308 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
311 if (!SignedMode && OpMinSize <= 8)
315 if (!SignedMode && OpMinSize <= 16)
322 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
375 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
376 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
377 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
378 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
379 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
380 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
381 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
382 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
383 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
387 if (
const auto *Entry =
389 if (
auto KindCost = Entry->Cost[
CostKind])
390 return LT.first * *KindCost;
393 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
394 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
395 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
396 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
397 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
398 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
399 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
400 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
401 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
403 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
404 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
405 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
406 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
407 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
408 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
412 if (
const auto *Entry =
414 if (
auto KindCost = Entry->Cost[
CostKind])
415 return LT.first * *KindCost;
418 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
419 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
420 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
422 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
423 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
424 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
426 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
427 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
428 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
429 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
430 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
431 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
433 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
434 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
435 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
436 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
437 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
438 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
439 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
448 if (
const auto *Entry =
450 if (
auto KindCost = Entry->Cost[
CostKind])
451 return LT.first * *KindCost;
454 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
455 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
456 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
457 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
458 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
459 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
461 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
462 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
463 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
464 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
465 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
466 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
468 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
469 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
470 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
471 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
472 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
473 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
475 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
476 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
477 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
478 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
479 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
480 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
489 if (
const auto *Entry =
491 if (
auto KindCost = Entry->Cost[
CostKind])
492 return LT.first * *KindCost;
495 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
496 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
497 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
498 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
499 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
500 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
502 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
503 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
504 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
505 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
506 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
507 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
509 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
510 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
511 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
512 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
513 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
514 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
516 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
517 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
518 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
519 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
520 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
521 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
531 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
532 if (
const auto *Entry =
534 if (
auto KindCost = Entry->Cost[
CostKind])
535 return LT.first * *KindCost;
538 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
539 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
540 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
542 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
543 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
544 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
546 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
547 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
548 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
550 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
551 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
552 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
562 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
563 if (
const auto *Entry =
565 if (
auto KindCost = Entry->Cost[
CostKind])
566 return LT.first * *KindCost;
581 if (
const auto *Entry =
583 if (
auto KindCost = Entry->Cost[
CostKind])
584 return LT.first * *KindCost;
604 if (
const auto *Entry =
606 if (
auto KindCost = Entry->Cost[
CostKind])
607 return LT.first * *KindCost;
627 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
628 if (
auto KindCost = Entry->Cost[
CostKind])
629 return LT.first * *KindCost;
649 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
650 if (
auto KindCost = Entry->Cost[
CostKind])
651 return LT.first * *KindCost;
659 if (
const auto *Entry =
661 if (
auto KindCost = Entry->Cost[
CostKind])
662 return LT.first * *KindCost;
682 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
683 if (
auto KindCost = Entry->Cost[
CostKind])
684 return LT.first * *KindCost;
687 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
688 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
689 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
690 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
691 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
692 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
693 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
694 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
695 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
697 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
698 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
699 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
703 if (
const auto *Entry =
705 if (
auto KindCost = Entry->Cost[
CostKind])
706 return LT.first * *KindCost;
709 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
710 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
711 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
713 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
714 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
715 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
717 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
718 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
719 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
720 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
721 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
722 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
723 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
727 if (
const auto *Entry =
729 if (
auto KindCost = Entry->Cost[
CostKind])
730 return LT.first * *KindCost;
734 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
735 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
736 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
737 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
738 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
739 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
741 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
742 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
743 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
744 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
745 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
746 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
748 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
749 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
750 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
751 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
752 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
753 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
755 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
756 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
757 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
758 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
759 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
760 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
764 if (
const auto *Entry =
766 if (
auto KindCost = Entry->Cost[
CostKind])
767 return LT.first * *KindCost;
770 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
771 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
772 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
773 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
774 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
775 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
777 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
778 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
779 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
780 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
781 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
782 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
784 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
785 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
786 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
787 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
788 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
789 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
791 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
792 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
793 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
794 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
795 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
796 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
801 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
802 if (
const auto *Entry =
804 if (
auto KindCost = Entry->Cost[
CostKind])
805 return LT.first * *KindCost;
809 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
810 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
811 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
813 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
814 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
815 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
817 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
818 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
819 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
821 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
822 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
823 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
827 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
828 if (
const auto *Entry =
830 if (
auto KindCost = Entry->Cost[
CostKind])
831 return LT.first * *KindCost;
834 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
835 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
836 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
841 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
842 if (
auto KindCost = Entry->Cost[
CostKind])
843 return LT.first * *KindCost;
846 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
847 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
848 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
849 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
850 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
851 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
852 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
853 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
854 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
856 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
857 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
858 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
859 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
860 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
861 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
862 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
863 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
864 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
866 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
867 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
869 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
870 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
871 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
872 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
874 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
875 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
877 {
ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } },
878 {
ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } },
879 {
ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } },
880 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
882 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
883 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
884 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
885 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
890 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
891 if (
auto KindCost = Entry->Cost[
CostKind])
892 return LT.first * *KindCost;
895 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
896 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
897 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
899 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
900 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
901 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
903 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
904 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
905 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
906 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
907 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
908 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
909 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
910 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
911 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
913 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
914 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
915 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
916 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
917 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
918 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
919 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
920 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
921 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
923 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
924 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
926 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
927 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
929 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
930 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
931 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
932 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
934 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
935 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
936 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
937 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
939 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
940 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
941 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
942 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
944 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
945 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
946 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
947 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
952 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
953 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
954 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
955 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
956 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
957 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
958 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
959 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
962 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
963 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
964 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
965 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
967 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
968 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
969 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
970 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
971 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
972 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
973 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
974 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
977 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
978 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
979 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
980 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
984 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
985 if (
auto KindCost = Entry->Cost[
CostKind])
986 return LT.first * *KindCost;
991 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
992 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
993 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
994 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
995 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
996 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
997 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
998 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
999 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
1000 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
1012 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
1013 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
1020 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
1021 if (
auto KindCost = Entry->Cost[
CostKind])
1022 return LT.first * *KindCost;
1027 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1028 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1029 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1030 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1031 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1032 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1033 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1034 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1035 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1036 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1037 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1038 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1040 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1041 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1042 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1043 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1044 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1045 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1046 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1047 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1048 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1049 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1050 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1051 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1061 if (
const auto *Entry =
1063 if (
auto KindCost = Entry->Cost[
CostKind])
1064 return LT.first * *KindCost;
1071 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1072 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1077 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1078 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1079 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1080 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1083 if (ST->useGLMDivSqrtCosts())
1084 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1085 if (
auto KindCost = Entry->Cost[
CostKind])
1086 return LT.first * *KindCost;
1089 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1090 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1091 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1092 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1093 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1094 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1095 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1096 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1097 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1098 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1099 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1100 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1106 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1108 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1109 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1112 if (ST->useSLMArithCosts())
1113 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1114 if (
auto KindCost = Entry->Cost[
CostKind])
1115 return LT.first * *KindCost;
1118 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1119 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1120 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1121 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1123 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1124 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1125 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1126 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1128 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1129 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1130 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1131 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1132 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1133 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1135 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1136 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1137 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1138 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1139 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1140 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1141 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1142 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1144 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1145 {
ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } },
1146 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1147 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1148 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1149 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1150 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1154 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1155 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1157 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1158 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1159 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1160 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1161 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1162 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1164 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1165 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1166 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1167 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1168 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1169 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1171 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1172 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1173 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1174 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1175 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1176 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1178 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1179 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1180 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1181 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1182 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1183 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1188 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1189 if (
auto KindCost = Entry->Cost[
CostKind])
1190 return LT.first * *KindCost;
1196 {
ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } },
1197 {
ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } },
1198 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1199 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1200 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1201 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1203 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1204 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1205 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1206 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1208 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1209 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1210 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1211 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1213 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1214 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1215 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1216 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1218 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1219 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1220 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1221 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1222 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1223 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1224 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1225 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1226 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1227 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1229 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1230 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1231 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1232 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1233 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1234 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1235 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1236 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1238 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1239 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1240 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1241 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1242 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1243 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1244 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1245 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1247 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1248 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1249 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1250 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1251 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1252 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1253 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1254 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1256 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1257 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1259 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1260 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1261 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1262 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1263 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1264 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1266 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1267 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1268 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1269 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1270 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1271 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1273 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1274 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1275 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1276 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1277 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1278 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1280 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1281 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1282 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1283 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1284 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1285 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1289 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1290 if (
auto KindCost = Entry->Cost[
CostKind])
1291 return LT.first * *KindCost;
1294 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1295 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1296 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1297 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1299 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1300 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1301 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1302 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1304 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1305 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1306 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1307 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1309 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1310 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1311 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1312 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1314 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1318 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1319 if (
auto KindCost = Entry->Cost[
CostKind])
1320 return LT.first * *KindCost;
1323 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1324 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1325 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1327 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1328 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1329 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1330 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1332 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1333 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1334 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1335 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1337 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1341 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1342 if (
auto KindCost = Entry->Cost[
CostKind])
1343 return LT.first * *KindCost;
1346 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1350 if (
const auto *Entry =
CostTableLookup(SSSE3CostTable, ISD, LT.second))
1351 if (
auto KindCost = Entry->Cost[
CostKind])
1352 return LT.first * *KindCost;
1357 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1358 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1359 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1360 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1362 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1363 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1364 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1365 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1367 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1368 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1369 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1370 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1372 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1373 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1374 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1375 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1377 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1378 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1379 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1380 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1382 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1383 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1384 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1385 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1387 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1388 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1390 {
ISD::MUL, MVT::v16i8, { 6, 18,12,12 } },
1391 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1392 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1393 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1397 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1398 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1399 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1400 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1402 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1403 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1404 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1405 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1407 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1408 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1409 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1411 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1412 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1413 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1415 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1416 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1420 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1421 if (
auto KindCost = Entry->Cost[
CostKind])
1422 return LT.first * *KindCost;
1425 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1426 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1428 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1429 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1431 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1432 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1434 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1435 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1437 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1438 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1442 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1443 if (
auto KindCost = Entry->Cost[
CostKind])
1444 return LT.first * *KindCost;
1449 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1454 if (
auto KindCost = Entry->Cost[
CostKind])
1455 return LT.first * *KindCost;
1466 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1467 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1468 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1470 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1471 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1472 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1473 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1474 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1478 if (
auto KindCost = Entry->Cost[
CostKind])
1479 return LT.first * *KindCost;
1493 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1542 CostKind, Mask.size() / 2, BaseTp);
1555 using namespace PatternMatch;
1558 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1563 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1564 LT.second = LT.second.changeVectorElementType(MVT::f16);
1569 int NumElts = LT.second.getVectorNumElements();
1570 if ((
Index % NumElts) == 0)
1573 if (SubLT.second.isVector()) {
1574 int NumSubElts = SubLT.second.getVectorNumElements();
1575 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1583 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1584 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
1585 (NumSubElts % OrigSubElts) == 0 &&
1586 LT.second.getVectorElementType() ==
1587 SubLT.second.getVectorElementType() &&
1588 LT.second.getVectorElementType().getSizeInBits() ==
1590 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1591 "Unexpected number of elements!");
1593 LT.second.getVectorNumElements());
1595 SubLT.second.getVectorNumElements());
1604 return ExtractCost + 1;
1607 "Unexpected vector size");
1609 return ExtractCost + 2;
1620 int NumElts = LT.second.getVectorNumElements();
1622 if (SubLT.second.isVector()) {
1623 int NumSubElts = SubLT.second.getVectorNumElements();
1624 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1637 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1668 if (
const auto *Entry =
1677 MVT LegalVT = LT.second;
1682 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1686 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1693 if (!Mask.empty() && NumOfDests.
isValid()) {
1711 unsigned E = *NumOfDests.
getValue();
1712 unsigned NormalizedVF =
1718 unsigned PrevSrcReg = 0;
1722 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1723 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1728 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1729 PrevRegMask != RegMask)
1737 if (SrcReg != DestReg &&
1742 PrevSrcReg = SrcReg;
1743 PrevRegMask = RegMask;
1756 std::nullopt,
CostKind, 0,
nullptr);
1767 LT.first = NumOfDests * NumOfShufflesPerDest;
1783 if (
const auto *Entry =
1785 return LT.first * Entry->Cost;
1818 if (
const auto *Entry =
1820 return LT.first * Entry->Cost;
1897 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1898 if (
auto KindCost = Entry->Cost[
CostKind])
1899 return LT.first * *KindCost;
1952 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1953 return LT.first * Entry->Cost;
1974 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
1975 return LT.first * Entry->Cost;
2037 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2038 return LT.first * Entry->Cost;
2051 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2052 return LT.first * Entry->Cost;
2083 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2084 return LT.first * Entry->Cost;
2140 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2142 if (
const auto *Entry =
2145 LT.second.getVectorElementCount()) &&
2146 "Table entry missing from isLegalBroadcastLoad()");
2147 return LT.first * Entry->Cost;
2150 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2151 return LT.first * Entry->Cost;
2164 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2165 return LT.first * Entry->Cost;
2176 assert(ISD &&
"Invalid opcode");
2322 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2323 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2670 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2747 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
2971 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
2989 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2990 if (
auto KindCost = Entry->Cost[
CostKind])
2995 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2996 if (
auto KindCost = Entry->Cost[
CostKind])
3001 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3002 if (
auto KindCost = Entry->Cost[
CostKind])
3008 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3009 if (
auto KindCost = Entry->Cost[
CostKind])
3014 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3015 if (
auto KindCost = Entry->Cost[
CostKind])
3020 SimpleDstTy, SimpleSrcTy))
3021 if (
auto KindCost = Entry->Cost[
CostKind])
3026 SimpleDstTy, SimpleSrcTy))
3027 if (
auto KindCost = Entry->Cost[
CostKind])
3033 SimpleDstTy, SimpleSrcTy))
3034 if (
auto KindCost = Entry->Cost[
CostKind])
3040 SimpleDstTy, SimpleSrcTy))
3041 if (
auto KindCost = Entry->Cost[
CostKind])
3047 SimpleDstTy, SimpleSrcTy))
3048 if (
auto KindCost = Entry->Cost[
CostKind])
3064 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3065 if (
auto KindCost = Entry->Cost[
CostKind])
3066 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3070 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3071 if (
auto KindCost = Entry->Cost[
CostKind])
3072 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3076 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3077 if (
auto KindCost = Entry->Cost[
CostKind])
3078 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3083 LTDest.second, LTSrc.second))
3084 if (
auto KindCost = Entry->Cost[
CostKind])
3085 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3089 LTDest.second, LTSrc.second))
3090 if (
auto KindCost = Entry->Cost[
CostKind])
3091 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3095 LTDest.second, LTSrc.second))
3096 if (
auto KindCost = Entry->Cost[
CostKind])
3097 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3101 LTDest.second, LTSrc.second))
3102 if (
auto KindCost = Entry->Cost[
CostKind])
3103 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3107 LTDest.second, LTSrc.second))
3108 if (
auto KindCost = Entry->Cost[
CostKind])
3109 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3113 LTDest.second, LTSrc.second))
3114 if (
auto KindCost = Entry->Cost[
CostKind])
3115 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3119 LTDest.second, LTSrc.second))
3120 if (
auto KindCost = Entry->Cost[
CostKind])
3121 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3126 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3127 Type *ExtSrc = Src->getWithNewBitWidth(32);
3133 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3143 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3144 Type *TruncDst = Dst->getWithNewBitWidth(32);
3154 return Cost == 0 ? 0 :
N;
3174 MVT MTy = LT.second;
3177 assert(ISD &&
"Invalid opcode");
3180 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3193 Pred = cast<CmpInst>(
I)->getPredicate();
3195 bool CmpWithConstant =
false;
3196 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3197 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3202 ExtraCost = CmpWithConstant ? 0 : 1;
3207 ExtraCost = CmpWithConstant ? 0 : 1;
3213 ExtraCost = CmpWithConstant ? 1 : 2;
3224 ExtraCost = CmpWithConstant ? 2 : 3;
3231 if (CondTy && !ST->
hasAVX())
3400 if (ST->useSLMArithCosts())
3402 if (
auto KindCost = Entry->Cost[
CostKind])
3403 return LT.first * (ExtraCost + *KindCost);
3407 if (
auto KindCost = Entry->Cost[
CostKind])
3408 return LT.first * (ExtraCost + *KindCost);
3412 if (
auto KindCost = Entry->Cost[
CostKind])
3413 return LT.first * (ExtraCost + *KindCost);
3417 if (
auto KindCost = Entry->Cost[
CostKind])
3418 return LT.first * (ExtraCost + *KindCost);
3422 if (
auto KindCost = Entry->Cost[
CostKind])
3423 return LT.first * (ExtraCost + *KindCost);
3427 if (
auto KindCost = Entry->Cost[
CostKind])
3428 return LT.first * (ExtraCost + *KindCost);
3432 if (
auto KindCost = Entry->Cost[
CostKind])
3433 return LT.first * (ExtraCost + *KindCost);
3437 if (
auto KindCost = Entry->Cost[
CostKind])
3438 return LT.first * (ExtraCost + *KindCost);
3442 if (
auto KindCost = Entry->Cost[
CostKind])
3443 return LT.first * (ExtraCost + *KindCost);
3447 if (
auto KindCost = Entry->Cost[
CostKind])
3448 return LT.first * (ExtraCost + *KindCost);
3473 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3474 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3475 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3476 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3477 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3478 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3479 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3480 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3481 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3482 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3483 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3484 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3485 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3486 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3487 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3509 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3510 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3511 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3512 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3513 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3514 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3515 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3516 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3517 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3518 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3519 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3520 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3522 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3523 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3524 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3525 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3526 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3527 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3530 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3531 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3553 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3554 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3555 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3556 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3557 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3558 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3559 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3560 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3561 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3562 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3563 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3564 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3565 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3569 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3570 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3571 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3572 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3573 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3574 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3575 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3576 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3577 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3578 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3579 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3580 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3581 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3582 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3583 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3584 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3585 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3586 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3595 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3596 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3597 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3598 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3603 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3604 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3605 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3606 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3611 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3612 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3613 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3614 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3615 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3616 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3617 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3618 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3619 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3627 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3628 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3629 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3630 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3631 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3632 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3633 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3634 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3635 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3636 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3637 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3638 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3639 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3640 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3641 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3642 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3643 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3644 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3645 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3646 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3647 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3648 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3649 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3650 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3657 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3658 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3659 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3660 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3661 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3662 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3663 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3664 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3665 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3666 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3667 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3668 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3669 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3670 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3671 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3672 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3673 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3674 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3675 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3676 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3677 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3678 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3679 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3680 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3708 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3711 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3712 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3728 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3729 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3730 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3731 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3732 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3733 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3734 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3735 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3736 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3737 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3738 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3739 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3740 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3741 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3742 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3743 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3754 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3755 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3756 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3757 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3758 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3759 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3760 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3761 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3776 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3777 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3778 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3779 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3780 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3781 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3782 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3783 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3784 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3785 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3786 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3787 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3788 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3789 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3792 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3793 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3794 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3795 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3796 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3797 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3798 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3799 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3802 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3803 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3804 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3805 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3806 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3807 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3808 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3809 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3810 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3811 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3817 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3818 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3819 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3820 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3821 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3822 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3823 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3824 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3825 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3826 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3838 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
3840 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
3841 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
3844 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
3845 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
3846 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
3847 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
3860 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
3862 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
3863 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
3864 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
3865 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
3866 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
3867 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
3868 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
3869 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
3870 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
3871 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
3872 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
3873 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
3874 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
3875 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
3876 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
3877 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
3878 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
3879 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
3880 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
3881 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
3882 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
3883 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
3884 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
3885 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
3888 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
3889 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
3890 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3891 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3892 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3893 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
3894 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3895 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3896 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3897 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3903 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
3904 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
3905 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3906 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3907 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3908 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
3909 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
3910 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3911 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3912 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3923 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
3924 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
3926 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
3927 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
3952 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
3954 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
3961 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
3963 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
3973 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
3976 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
3977 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
3978 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3979 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3980 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3981 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3982 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3983 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
3984 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3985 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3986 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
3987 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3988 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3991 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
3992 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
3993 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
4001 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
4002 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
4003 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
4004 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
4005 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
4006 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
4007 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
4008 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
4009 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
4010 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
4011 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
4012 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
4015 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
4016 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
4017 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
4018 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
4023 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
4026 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
4027 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
4028 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
4029 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
4030 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
4031 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
4032 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
4033 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
4034 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
4035 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4036 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4037 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4040 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4041 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4042 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4043 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4044 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4045 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4046 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4047 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4052 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4053 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4054 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4055 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4056 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4057 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4058 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4059 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4065 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4071 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4098 {
ISD::ABS, MVT::i64, { 1, 2, 3, 3 } },
4106 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4107 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4109 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4110 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4111 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4112 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4113 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4119 {
ISD::ABS, MVT::i32, { 1, 2, 3, 3 } },
4120 {
ISD::ABS, MVT::i16, { 2, 2, 3, 3 } },
4121 {
ISD::ABS, MVT::i8, { 2, 4, 4, 3 } },
4142 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4143 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4145 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4146 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4151 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4152 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4154 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4155 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4157 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4158 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4160 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4161 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4163 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4164 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4184 case Intrinsic::abs:
4187 case Intrinsic::bitreverse:
4190 case Intrinsic::bswap:
4193 case Intrinsic::ctlz:
4196 case Intrinsic::ctpop:
4199 case Intrinsic::cttz:
4202 case Intrinsic::fshl:
4206 if (Args[0] == Args[1]) {
4217 case Intrinsic::fshr:
4222 if (Args[0] == Args[1]) {
4233 case Intrinsic::lrint:
4234 case Intrinsic::llrint:
4243 case Intrinsic::maxnum:
4244 case Intrinsic::minnum:
4248 case Intrinsic::sadd_sat:
4251 case Intrinsic::smax:
4254 case Intrinsic::smin:
4257 case Intrinsic::ssub_sat:
4260 case Intrinsic::uadd_sat:
4263 case Intrinsic::umax:
4266 case Intrinsic::umin:
4269 case Intrinsic::usub_sat:
4272 case Intrinsic::sqrt:
4275 case Intrinsic::sadd_with_overflow:
4276 case Intrinsic::ssub_with_overflow:
4279 OpTy =
RetTy->getContainedType(0);
4281 case Intrinsic::uadd_with_overflow:
4282 case Intrinsic::usub_with_overflow:
4285 OpTy =
RetTy->getContainedType(0);
4287 case Intrinsic::umul_with_overflow:
4288 case Intrinsic::smul_with_overflow:
4291 OpTy =
RetTy->getContainedType(0);
4296 auto adjustTableCost = [&](
int ISD,
unsigned Cost,
4297 std::pair<InstructionCost, MVT> LT,
4300 MVT MTy = LT.second;
4307 return LegalizationCost * 1;
4312 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4314 if (
II->hasOneUse() && isa<StoreInst>(
II->user_back()))
4316 if (
auto *LI = dyn_cast<LoadInst>(
II->getOperand(0))) {
4317 if (LI->hasOneUse())
4324 return LegalizationCost * (int)
Cost;
4329 MVT MTy = LT.second;
4332 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4333 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4336 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4337 if (Cst->isAllOnesValue())
4345 if (ST->useGLMDivSqrtCosts())
4347 if (
auto KindCost = Entry->Cost[
CostKind])
4348 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4350 if (ST->useSLMArithCosts())
4352 if (
auto KindCost = Entry->Cost[
CostKind])
4353 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4356 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4357 if (
auto KindCost = Entry->Cost[
CostKind])
4358 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4360 if (ST->hasBITALG())
4361 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4362 if (
auto KindCost = Entry->Cost[
CostKind])
4363 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4365 if (ST->hasVPOPCNTDQ())
4366 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4367 if (
auto KindCost = Entry->Cost[
CostKind])
4368 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4372 if (
auto KindCost = Entry->Cost[
CostKind])
4373 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4377 if (
auto KindCost = Entry->Cost[
CostKind])
4378 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4382 if (
auto KindCost = Entry->Cost[
CostKind])
4383 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4387 if (
auto KindCost = Entry->Cost[
CostKind])
4388 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4392 if (
auto KindCost = Entry->Cost[
CostKind])
4393 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4397 if (
auto KindCost = Entry->Cost[
CostKind])
4398 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4402 if (
auto KindCost = Entry->Cost[
CostKind])
4403 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4407 if (
auto KindCost = Entry->Cost[
CostKind])
4408 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4412 if (
auto KindCost = Entry->Cost[
CostKind])
4413 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4417 if (
auto KindCost = Entry->Cost[
CostKind])
4418 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4422 if (
auto KindCost = Entry->Cost[
CostKind])
4423 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4427 if (
auto KindCost = Entry->Cost[
CostKind])
4428 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4433 if (
auto KindCost = Entry->Cost[
CostKind])
4434 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4437 if (
auto KindCost = Entry->Cost[
CostKind])
4438 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4441 if (ST->hasLZCNT()) {
4444 if (
auto KindCost = Entry->Cost[
CostKind])
4445 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4448 if (
auto KindCost = Entry->Cost[
CostKind])
4449 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4452 if (ST->hasPOPCNT()) {
4455 if (
auto KindCost = Entry->Cost[
CostKind])
4456 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4459 if (
auto KindCost = Entry->Cost[
CostKind])
4460 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4465 if (
auto KindCost = Entry->Cost[
CostKind])
4466 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4469 if (
auto KindCost = Entry->Cost[
CostKind])
4470 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4493 if (
Index == -1U && (Opcode == Instruction::ExtractElement ||
4494 Opcode == Instruction::InsertElement)) {
4499 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4504 if (Opcode == Instruction::ExtractElement) {
4510 if (Opcode == Instruction::InsertElement) {
4518 if (
Index != -1U && (Opcode == Instruction::ExtractElement ||
4519 Opcode == Instruction::InsertElement)) {
4521 if (Opcode == Instruction::ExtractElement &&
4523 cast<FixedVectorType>(Val)->getNumElements() > 1)
4530 if (!LT.second.isVector())
4534 unsigned SizeInBits = LT.second.getSizeInBits();
4535 unsigned NumElts = LT.second.getVectorNumElements();
4536 unsigned SubNumElts = NumElts;
4541 if (SizeInBits > 128) {
4542 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4543 unsigned NumSubVecs = SizeInBits / 128;
4544 SubNumElts = NumElts / NumSubVecs;
4545 if (SubNumElts <=
Index) {
4546 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4547 Index %= SubNumElts;
4551 MVT MScalarTy = LT.second.getScalarType();
4552 auto IsCheapPInsrPExtrInsertPS = [&]() {
4555 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4557 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4558 Opcode == Instruction::InsertElement);
4566 (Opcode != Instruction::InsertElement || !Op0 ||
4567 isa<UndefValue>(Op0)))
4568 return RegisterFileMoveCost;
4570 if (Opcode == Instruction::InsertElement &&
4571 isa_and_nonnull<UndefValue>(Op0)) {
4573 if (isa_and_nonnull<LoadInst>(Op1))
4574 return RegisterFileMoveCost;
4575 if (!IsCheapPInsrPExtrInsertPS()) {
4578 return 2 + RegisterFileMoveCost;
4580 return 1 + RegisterFileMoveCost;
4585 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4586 return 1 + RegisterFileMoveCost;
4590 assert(ISD &&
"Unexpected vector opcode");
4591 if (ST->useSLMArithCosts())
4593 return Entry->Cost + RegisterFileMoveCost;
4596 if (IsCheapPInsrPExtrInsertPS())
4597 return 1 + RegisterFileMoveCost;
4606 if (Opcode == Instruction::InsertElement) {
4607 auto *SubTy = cast<VectorType>(Val);
4615 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4619 RegisterFileMoveCost;
4624 bool Insert,
bool Extract,
4627 cast<FixedVectorType>(Ty)->getNumElements() &&
4628 "Vector size mismatch");
4631 MVT MScalarTy = LT.second.getScalarType();
4632 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4635 constexpr unsigned LaneBitWidth = 128;
4636 assert((LegalVectorBitWidth < LaneBitWidth ||
4637 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4640 const int NumLegalVectors = *LT.first.getValue();
4641 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4646 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4648 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4651 if (LegalVectorBitWidth <= LaneBitWidth) {
4667 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4668 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4669 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4670 unsigned NumLegalElts =
4671 LT.second.getVectorNumElements() * NumLegalVectors;
4673 "Vector has been legalized to smaller element count");
4674 assert((NumLegalElts % NumLanesTotal) == 0 &&
4675 "Unexpected elts per lane");
4676 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4678 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4682 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4684 NumEltsPerLane, NumEltsPerLane *
I);
4685 if (LaneEltMask.
isZero())
4696 APInt AffectedLanes =
4699 AffectedLanes, NumLegalVectors,
true);
4700 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4701 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4702 unsigned I = NumLegalLanes * LegalVec + Lane;
4705 if (!AffectedLanes[
I] ||
4706 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4713 }
else if (LT.second.isVector()) {
4724 unsigned NumElts = LT.second.getVectorNumElements();
4727 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4736 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4737 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4738 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4742 if (LT.second.isVector()) {
4743 unsigned NumLegalElts =
4744 LT.second.getVectorNumElements() * NumLegalVectors;
4746 "Vector has been legalized to smaller element count");
4750 if (LegalVectorBitWidth > LaneBitWidth) {
4751 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4752 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4753 assert((NumLegalElts % NumLanesTotal) == 0 &&
4754 "Unexpected elts per lane");
4755 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4759 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4763 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4765 NumEltsPerLane,
I * NumEltsPerLane);
4766 if (LaneEltMask.
isZero())
4771 LaneTy, LaneEltMask,
false, Extract,
CostKind);
4788 int VF,
const APInt &DemandedDstElts,
4794 auto bailout = [&]() {
4804 unsigned PromEltTyBits = EltTyBits;
4805 switch (EltTyBits) {
4836 int NumDstElements = VF * ReplicationFactor;
4850 if (PromEltTyBits != EltTyBits) {
4856 Instruction::SExt, PromSrcVecTy, SrcVecTy,
4863 ReplicationFactor, VF,
4869 "We expect that the legalization doesn't affect the element width, "
4870 "doesn't coalesce/split elements.");
4873 unsigned NumDstVectors =
4874 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
4883 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
4884 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
4889 return NumDstVectorsDemanded * SingleShuffleCost;
4900 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
4903 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
4904 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
4911 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4921 auto *VTy = dyn_cast<FixedVectorType>(Src);
4926 if (Opcode == Instruction::Store && OpInfo.
isConstant())
4932 if (!VTy || !LT.second.isVector()) {
4934 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
4937 bool IsLoad = Opcode == Instruction::Load;
4939 Type *EltTy = VTy->getElementType();
4944 const unsigned SrcNumElt = VTy->getNumElements();
4947 int NumEltRemaining = SrcNumElt;
4949 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
4951 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
4954 const unsigned XMMBits = 128;
4955 if (XMMBits % EltTyBits != 0)
4959 const int NumEltPerXMM = XMMBits / EltTyBits;
4963 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
4964 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
4966 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
4970 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
4972 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
4973 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
4974 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
4975 "Unless we haven't halved the op size yet, "
4976 "we have less than two op's sized units of work left.");
4978 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
4982 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
4983 "After halving sizes, the vector elt count is no longer a multiple "
4984 "of number of elements per operation?");
4985 auto *CoalescedVecTy =
4986 CurrNumEltPerOp == 1
4990 EltTyBits * CurrNumEltPerOp),
4991 CurrVecTy->getNumElements() / CurrNumEltPerOp);
4994 "coalesciing elements doesn't change vector width.");
4996 while (NumEltRemaining > 0) {
4997 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
5001 if (NumEltRemaining < CurrNumEltPerOp &&
5002 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
5003 CurrOpSizeBytes != 1)
5006 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
5009 if (SubVecEltsLeft == 0) {
5010 SubVecEltsLeft += CurrVecTy->getNumElements();
5015 VTy, std::nullopt,
CostKind, NumEltDone(),
5023 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5024 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5025 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
5026 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5027 APInt DemandedElts =
5029 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
5030 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5040 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5042 else if (CurrOpSizeBytes < 4)
5047 SubVecEltsLeft -= CurrNumEltPerOp;
5048 NumEltRemaining -= CurrNumEltPerOp;
5053 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5062 bool IsLoad = (Instruction::Load == Opcode);
5063 bool IsStore = (Instruction::Store == Opcode);
5065 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5070 unsigned NumElem = SrcVTy->getNumElements();
5078 MaskTy, DemandedElts,
false,
true,
CostKind);
5083 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5085 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5089 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5097 if (Ty == MVT::i16 || Ty == MVT::i32 || Ty == MVT::i64)
5099 return Cost + LT.first;
5101 if (VT.isSimple() && Ty != VT.getSimpleVT() &&
5102 LT.second.getVectorNumElements() == NumElem)
5119 return Cost + LT.first * (IsLoad ? 2 : 8);
5122 return Cost + LT.first;
5130 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5134 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5136 return getGEPCost(BaseGEP->getSourceElementType(),
5137 BaseGEP->getPointerOperand(), Indices,
nullptr,
5152 const unsigned NumVectorInstToHideOverhead = 10;
5165 return NumVectorInstToHideOverhead;
5175 std::optional<FastMathFlags> FMF,
5216 assert(ISD &&
"Invalid opcode");
5224 if (ST->useSLMArithCosts())
5239 MVT MTy = LT.second;
5241 auto *ValVTy = cast<FixedVectorType>(ValTy);
5254 if (LT.first != 1 && MTy.
isVector() &&
5260 ArithmeticCost *= LT.first - 1;
5263 if (ST->useSLMArithCosts())
5265 return ArithmeticCost + Entry->Cost;
5269 return ArithmeticCost + Entry->Cost;
5273 return ArithmeticCost + Entry->Cost;
5322 if (ValVTy->getElementType()->isIntegerTy(1)) {
5324 if (LT.first != 1 && MTy.
isVector() &&
5330 ArithmeticCost *= LT.first - 1;
5334 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5335 return ArithmeticCost + Entry->Cost;
5338 return ArithmeticCost + Entry->Cost;
5341 return ArithmeticCost + Entry->Cost;
5344 return ArithmeticCost + Entry->Cost;
5349 unsigned NumVecElts = ValVTy->getNumElements();
5350 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5360 if (LT.first != 1 && MTy.
isVector() &&
5366 ReductionCost *= LT.first - 1;
5372 while (NumVecElts > 1) {
5374 unsigned Size = NumVecElts * ScalarSize;
5383 }
else if (
Size == 128) {
5386 if (ValVTy->isFloatingPointTy())
5393 std::nullopt,
CostKind, 0,
nullptr);
5394 }
else if (
Size == 64) {
5397 if (ValVTy->isFloatingPointTy())
5404 std::nullopt,
CostKind, 0,
nullptr);
5410 Instruction::LShr, ShiftTy,
CostKind,
5437 MVT MTy = LT.second;
5441 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5445 "Expected float point or integer vector type.");
5446 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5514 auto *ValVTy = cast<FixedVectorType>(ValTy);
5515 unsigned NumVecElts = ValVTy->getNumElements();
5519 if (LT.first != 1 && MTy.
isVector() &&
5525 MinMaxCost *= LT.first - 1;
5531 return MinMaxCost + Entry->Cost;
5535 return MinMaxCost + Entry->Cost;
5539 return MinMaxCost + Entry->Cost;
5543 return MinMaxCost + Entry->Cost;
5555 while (NumVecElts > 1) {
5557 unsigned Size = NumVecElts * ScalarSize;
5565 }
else if (
Size == 128) {
5574 std::nullopt,
CostKind, 0,
nullptr);
5575 }
else if (
Size == 64) {
5583 std::nullopt,
CostKind, 0,
nullptr);
5636 if (BitSize % 64 != 0)
5637 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5642 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5648 return std::max<InstructionCost>(1,
Cost);
5663 unsigned ImmIdx = ~0U;
5667 case Instruction::GetElementPtr:
5674 case Instruction::Store:
5677 case Instruction::ICmp:
5683 if (
Idx == 1 && Imm.getBitWidth() == 64) {
5684 uint64_t ImmVal = Imm.getZExtValue();
5685 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5690 case Instruction::And:
5694 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.isIntN(32))
5698 case Instruction::Add:
5699 case Instruction::Sub:
5701 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
5705 case Instruction::UDiv:
5706 case Instruction::SDiv:
5707 case Instruction::URem:
5708 case Instruction::SRem:
5713 case Instruction::Mul:
5714 case Instruction::Or:
5715 case Instruction::Xor:
5719 case Instruction::Shl:
5720 case Instruction::LShr:
5721 case Instruction::AShr:
5725 case Instruction::Trunc:
5726 case Instruction::ZExt:
5727 case Instruction::SExt:
5728 case Instruction::IntToPtr:
5729 case Instruction::PtrToInt:
5730 case Instruction::BitCast:
5731 case Instruction::PHI:
5732 case Instruction::Call:
5733 case Instruction::Select:
5734 case Instruction::Ret:
5735 case Instruction::Load:
5739 if (
Idx == ImmIdx) {
5764 case Intrinsic::sadd_with_overflow:
5765 case Intrinsic::uadd_with_overflow:
5766 case Intrinsic::ssub_with_overflow:
5767 case Intrinsic::usub_with_overflow:
5768 case Intrinsic::smul_with_overflow:
5769 case Intrinsic::umul_with_overflow:
5770 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
5773 case Intrinsic::experimental_stackmap:
5774 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5777 case Intrinsic::experimental_patchpoint_void:
5778 case Intrinsic::experimental_patchpoint:
5779 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5790 return Opcode == Instruction::PHI ? 0 : 1;
5795int X86TTIImpl::getGatherOverhead()
const {
5808int X86TTIImpl::getScatterOverhead()
const {
5822 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
5823 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5833 if (IndexSize < 64 || !
GEP)
5836 unsigned NumOfVarIndices = 0;
5837 const Value *Ptrs =
GEP->getPointerOperand();
5840 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
5841 if (isa<Constant>(
GEP->getOperand(
I)))
5843 Type *IndxTy =
GEP->getOperand(
I)->getType();
5844 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
5845 IndxTy = IndexVTy->getElementType();
5847 !isa<SExtInst>(
GEP->getOperand(
I))) ||
5848 ++NumOfVarIndices > 1)
5851 return (
unsigned)32;
5856 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
5857 ? getIndexSizeInBits(
Ptr,
DL)
5865 *std::max(IdxsLT.first, SrcLT.first).getValue();
5866 if (SplitFactor > 1) {
5870 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
5880 const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
5881 : getScatterOverhead();
5889 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
5892 if ((Opcode == Instruction::Load &&
5895 Align(Alignment)))) ||
5896 (Opcode == Instruction::Store &&
5899 Align(Alignment)))))
5905 if (!PtrTy &&
Ptr->getType()->isVectorTy())
5906 PtrTy = dyn_cast<PointerType>(
5907 cast<VectorType>(
Ptr->getType())->getElementType());
5908 assert(PtrTy &&
"Unexpected type for Ptr argument");
5910 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
5926 return ST->hasMacroFusion() || ST->hasBranchFusion();
5933 if (isa<VectorType>(DataTy) && cast<FixedVectorType>(DataTy)->
getNumElements() == 1)
5945 if (ScalarTy->
isHalfTy() && ST->hasBWI())
5955 return IntWidth == 32 || IntWidth == 64 ||
5956 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
5968 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
5985 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6007 if (!isa<VectorType>(DataTy))
6017 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6026 return IntWidth == 32 || IntWidth == 64 ||
6027 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6034bool X86TTIImpl::supportsGather()
const {
6048 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6049 return NumElts == 1 ||
6050 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6065 return IntWidth == 32 || IntWidth == 64;
6069 if (!supportsGather() || !ST->preferGather())
6084 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6085 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6090 for (
int Lane : seq<int>(0, NumElements)) {
6091 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6093 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6095 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6099 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6101 return ST->
hasSSE3() && NumElements % 4 == 0;
6103 return ST->
hasSSE3() && NumElements % 2 == 0;
6109 if (!ST->
hasAVX512() || !ST->preferScatter())
6122 if (
I->getOpcode() == Instruction::FDiv)
6138 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6140 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6143 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6144 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6145 if (RealCallerBits == RealCalleeBits)
6150 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6154 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6156 if (CB->isInlineAsm())
6160 for (
Value *Arg : CB->args())
6161 Types.push_back(Arg->getType());
6162 if (!CB->getType()->isVoidTy())
6163 Types.push_back(CB->getType());
6166 auto IsSimpleTy = [](
Type *Ty) {
6167 return !Ty->isVectorTy() && !Ty->isAggregateType();
6169 if (
all_of(Types, IsSimpleTy))
6172 if (
Function *NestedCallee = CB->getCalledFunction()) {
6174 if (NestedCallee->isIntrinsic())
6209 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6218 Options.AllowOverlappingLoads =
true;
6223 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6224 Options.LoadSizes.push_back(64);
6225 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6226 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6228 if (ST->is64Bit()) {
6229 Options.LoadSizes.push_back(8);
6231 Options.LoadSizes.push_back(4);
6232 Options.LoadSizes.push_back(2);
6233 Options.LoadSizes.push_back(1);
6238 return supportsGather();
6249 return !(ST->isAtom());
6269 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6275 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6288 if (UseMaskedMemOp) {
6290 for (
unsigned Index : Indices) {
6291 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
6292 for (
unsigned Elm = 0; Elm < VF; Elm++)
6293 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
6300 UseMaskForGaps ? DemandedLoadStoreElts
6309 if (UseMaskForGaps) {
6315 if (Opcode == Instruction::Load) {
6322 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6323 {3, MVT::v16i8, 12},
6324 {3, MVT::v32i8, 14},
6325 {3, MVT::v64i8, 22},
6328 if (
const auto *Entry =
6330 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6340 ShuffleKind, SingleMemOpTy, std::nullopt,
CostKind, 0,
nullptr);
6342 unsigned NumOfLoadsInInterleaveGrp =
6343 Indices.
size() ? Indices.
size() : Factor;
6352 unsigned NumOfUnfoldedLoads =
6353 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6356 unsigned NumOfShufflesPerResult =
6357 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6364 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6367 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6374 assert(Opcode == Instruction::Store &&
6375 "Expected Store Instruction at this point");
6377 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6378 {3, MVT::v16i8, 12},
6379 {3, MVT::v32i8, 14},
6380 {3, MVT::v64i8, 26},
6383 {4, MVT::v16i8, 11},
6384 {4, MVT::v32i8, 14},
6388 if (
const auto *Entry =
6390 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6395 unsigned NumOfSources = Factor;
6398 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6402 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6405 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6413 bool UseMaskForCond,
bool UseMaskForGaps) {
6414 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6416 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6417 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6422 return ST->hasBWI();
6424 return ST->hasBF16();
6427 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6429 Opcode, VecTy, Factor, Indices, Alignment,
6432 if (UseMaskForCond || UseMaskForGaps)
6435 UseMaskForCond, UseMaskForGaps);
6455 unsigned VF = VecTy->getNumElements() / Factor;
6456 Type *ScalarTy = VecTy->getElementType();
6488 {2, MVT::v16i16, 9},
6489 {2, MVT::v32i16, 18},
6492 {2, MVT::v16i32, 8},
6493 {2, MVT::v32i32, 16},
6497 {2, MVT::v16i64, 16},
6498 {2, MVT::v32i64, 32},
6503 {3, MVT::v16i8, 11},
6504 {3, MVT::v32i8, 14},
6509 {3, MVT::v16i16, 28},
6510 {3, MVT::v32i16, 56},
6515 {3, MVT::v16i32, 14},
6516 {3, MVT::v32i32, 32},
6520 {3, MVT::v8i64, 10},
6521 {3, MVT::v16i64, 20},
6526 {4, MVT::v16i8, 24},
6527 {4, MVT::v32i8, 56},
6530 {4, MVT::v4i16, 17},
6531 {4, MVT::v8i16, 33},
6532 {4, MVT::v16i16, 75},
6533 {4, MVT::v32i16, 150},
6537 {4, MVT::v8i32, 16},
6538 {4, MVT::v16i32, 32},
6539 {4, MVT::v32i32, 68},
6543 {4, MVT::v8i64, 20},
6544 {4, MVT::v16i64, 40},
6549 {6, MVT::v16i8, 43},
6550 {6, MVT::v32i8, 82},
6552 {6, MVT::v2i16, 13},
6554 {6, MVT::v8i16, 39},
6555 {6, MVT::v16i16, 106},
6556 {6, MVT::v32i16, 212},
6559 {6, MVT::v4i32, 15},
6560 {6, MVT::v8i32, 31},
6561 {6, MVT::v16i32, 64},
6564 {6, MVT::v4i64, 18},
6565 {6, MVT::v8i64, 36},
6570 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6584 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6589 {2, MVT::v16i16, 4},
6590 {2, MVT::v32i16, 8},
6594 {2, MVT::v16i32, 8},
6595 {2, MVT::v32i32, 16},
6600 {2, MVT::v16i64, 16},
6601 {2, MVT::v32i64, 32},
6606 {3, MVT::v16i8, 11},
6607 {3, MVT::v32i8, 13},
6611 {3, MVT::v8i16, 12},
6612 {3, MVT::v16i16, 27},
6613 {3, MVT::v32i16, 54},
6617 {3, MVT::v8i32, 11},
6618 {3, MVT::v16i32, 22},
6619 {3, MVT::v32i32, 48},
6623 {3, MVT::v8i64, 12},
6624 {3, MVT::v16i64, 24},
6630 {4, MVT::v32i8, 12},
6634 {4, MVT::v8i16, 10},
6635 {4, MVT::v16i16, 32},
6636 {4, MVT::v32i16, 64},
6640 {4, MVT::v8i32, 16},
6641 {4, MVT::v16i32, 32},
6642 {4, MVT::v32i32, 64},
6646 {4, MVT::v8i64, 20},
6647 {4, MVT::v16i64, 40},
6652 {6, MVT::v16i8, 27},
6653 {6, MVT::v32i8, 90},
6655 {6, MVT::v2i16, 10},
6656 {6, MVT::v4i16, 15},
6657 {6, MVT::v8i16, 21},
6658 {6, MVT::v16i16, 58},
6659 {6, MVT::v32i16, 90},
6662 {6, MVT::v4i32, 12},
6663 {6, MVT::v8i32, 33},
6664 {6, MVT::v16i32, 66},
6667 {6, MVT::v4i64, 15},
6668 {6, MVT::v8i64, 30},
6671 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6682 if (Opcode == Instruction::Load) {
6683 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6687 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6691 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6693 return GetDiscountedCost(Entry);
6696 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6698 return GetDiscountedCost(Entry);
6701 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6703 return GetDiscountedCost(Entry);
6705 assert(Opcode == Instruction::Store &&
6706 "Expected Store Instruction at this point");
6708 "Interleaved store only supports fully-interleaved groups.");
6710 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6712 return MemOpCosts + Entry->Cost;
6715 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6717 return MemOpCosts + Entry->Cost;
6722 UseMaskForCond, UseMaskForGaps);
6727 bool HasBaseReg, int64_t Scale,
6728 unsigned AddrSpace)
const {
6756 return AM.
Scale != 0;
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t IntrinsicInst * II
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getBranchMispredictPenalty() const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.