63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
164 bool Vector = (ClassID == 1);
183 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
185 if (ST->
hasAVX() && PreferVectorWidth >= 256)
187 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
228 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
245 assert(ISD &&
"Invalid opcode");
247 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
248 (LT.second.getScalarType() == MVT::i32 ||
249 LT.second.getScalarType() == MVT::i64)) {
251 bool Op1Signed =
false, Op2Signed =
false;
254 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
255 bool SignedMode = Op1Signed || Op2Signed;
260 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
261 LT.second.getScalarType() == MVT::i32) {
263 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
265 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
266 bool Op1Sext = isa<SExtInst>(Args[0]) &&
267 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
268 bool Op2Sext = isa<SExtInst>(Args[1]) &&
269 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
271 bool IsZeroExtended = !Op1Signed || !Op2Signed;
272 bool IsConstant = Op1Constant || Op2Constant;
273 bool IsSext = Op1Sext || Op2Sext;
274 if (IsConstant || IsZeroExtended || IsSext)
282 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
285 if (!SignedMode && OpMinSize <= 8)
289 if (!SignedMode && OpMinSize <= 16)
296 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
349 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
350 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
351 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
352 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
353 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
354 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
355 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
356 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
357 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
359 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
360 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
361 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
362 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
363 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
364 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
368 if (
const auto *Entry =
370 if (
auto KindCost = Entry->Cost[
CostKind])
371 return LT.first * *KindCost;
374 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
375 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
376 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
378 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
379 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
380 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
382 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
383 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
384 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
385 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
386 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
387 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
389 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
390 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
391 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
392 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
393 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
394 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
395 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
404 if (
const auto *Entry =
406 if (
auto KindCost = Entry->Cost[
CostKind])
407 return LT.first * *KindCost;
410 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
411 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
412 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
413 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
414 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
415 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
417 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
418 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
419 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
420 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
421 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
422 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
424 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
425 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
426 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
427 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
428 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
429 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
431 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
432 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
433 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
434 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
435 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
436 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
445 if (
const auto *Entry =
447 if (
auto KindCost = Entry->Cost[
CostKind])
448 return LT.first * *KindCost;
451 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
452 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
453 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
454 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
455 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
456 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
458 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
459 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
460 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
461 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
462 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
463 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
465 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
466 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
467 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
468 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
469 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
470 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
472 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
473 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
474 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
475 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
476 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
477 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
487 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
488 if (
const auto *Entry =
490 if (
auto KindCost = Entry->Cost[
CostKind])
491 return LT.first * *KindCost;
494 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
495 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
496 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
498 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
499 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
500 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
502 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
503 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
504 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
506 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
507 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
508 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
518 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
519 if (
const auto *Entry =
521 if (
auto KindCost = Entry->Cost[
CostKind])
522 return LT.first * *KindCost;
537 if (
const auto *Entry =
539 if (
auto KindCost = Entry->Cost[
CostKind])
540 return LT.first * *KindCost;
560 if (
const auto *Entry =
562 if (
auto KindCost = Entry->Cost[
CostKind])
563 return LT.first * *KindCost;
583 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
584 if (
auto KindCost = Entry->Cost[
CostKind])
585 return LT.first * *KindCost;
605 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
606 if (
auto KindCost = Entry->Cost[
CostKind])
607 return LT.first * *KindCost;
615 if (
const auto *Entry =
617 if (
auto KindCost = Entry->Cost[
CostKind])
618 return LT.first * *KindCost;
638 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
639 if (
auto KindCost = Entry->Cost[
CostKind])
640 return LT.first * *KindCost;
643 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
644 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
645 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
646 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
647 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
648 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
649 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
650 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
651 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
653 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
654 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
655 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
659 if (
const auto *Entry =
661 if (
auto KindCost = Entry->Cost[
CostKind])
662 return LT.first * *KindCost;
665 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
666 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
667 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
669 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
670 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
671 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
673 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
674 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
675 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
676 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
677 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
678 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
679 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
683 if (
const auto *Entry =
685 if (
auto KindCost = Entry->Cost[
CostKind])
686 return LT.first * *KindCost;
690 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
691 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
692 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
693 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
694 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
695 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
697 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
698 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
699 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
700 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
701 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
702 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
704 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
705 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
706 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
707 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
708 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
709 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
711 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
712 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
713 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
714 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
715 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
716 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
720 if (
const auto *Entry =
722 if (
auto KindCost = Entry->Cost[
CostKind])
723 return LT.first * *KindCost;
726 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
727 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
728 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
729 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
730 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
731 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
733 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
734 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
735 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
736 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
737 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
738 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
740 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
741 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
742 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
743 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
744 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
745 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
747 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
748 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
749 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
750 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
751 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
752 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
757 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
758 if (
const auto *Entry =
760 if (
auto KindCost = Entry->Cost[
CostKind])
761 return LT.first * *KindCost;
765 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
766 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
767 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
769 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
770 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
771 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
773 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
774 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
775 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
777 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
778 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
779 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
783 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
784 if (
const auto *Entry =
786 if (
auto KindCost = Entry->Cost[
CostKind])
787 return LT.first * *KindCost;
790 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
791 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
792 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
797 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
798 if (
auto KindCost = Entry->Cost[
CostKind])
799 return LT.first * *KindCost;
802 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
803 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
804 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
805 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
806 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
807 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
808 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
809 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
810 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
812 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
813 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
814 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
815 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
816 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
817 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
818 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
819 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
820 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
822 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
823 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
825 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
826 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
827 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
828 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
830 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
831 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
833 {
ISD::MUL, MVT::v64i8, { 5, 10,10,11 } },
834 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
836 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
837 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
838 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
839 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
844 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
845 if (
auto KindCost = Entry->Cost[
CostKind])
846 return LT.first * *KindCost;
849 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
850 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
851 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
853 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
854 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
855 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
857 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
858 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
859 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
860 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
861 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
862 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
863 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
864 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
865 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
867 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
868 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
869 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
870 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
871 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
872 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
873 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
874 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
875 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
877 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
878 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
880 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
881 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
883 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
884 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
885 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
886 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
888 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
889 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
890 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
891 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
893 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
894 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
895 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
896 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
898 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
899 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
900 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
901 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
906 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
907 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
908 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
909 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
910 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
911 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
912 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
913 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
916 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
917 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
918 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
919 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
921 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
922 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
923 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
924 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
925 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
926 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
927 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
928 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
931 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
932 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
933 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
934 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
938 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
939 if (
auto KindCost = Entry->Cost[
CostKind])
940 return LT.first * *KindCost;
945 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
946 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
947 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
948 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
949 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
950 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
951 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
952 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
953 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
954 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
966 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
967 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
974 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
975 if (
auto KindCost = Entry->Cost[
CostKind])
976 return LT.first * *KindCost;
981 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
982 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
983 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
984 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
985 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
986 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
987 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
988 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
989 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
990 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
991 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
992 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
994 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
995 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
996 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
997 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
998 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
999 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1000 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1001 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1002 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1003 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1004 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1005 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1015 if (
const auto *Entry =
1017 if (
auto KindCost = Entry->Cost[
CostKind])
1018 return LT.first * *KindCost;
1025 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1026 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1031 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1032 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1033 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1034 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1037 if (ST->useGLMDivSqrtCosts())
1038 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1039 if (
auto KindCost = Entry->Cost[
CostKind])
1040 return LT.first * *KindCost;
1043 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1044 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1045 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1046 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1047 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1048 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1049 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1050 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1051 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1052 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1053 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1054 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1060 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1062 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1063 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1066 if (ST->useSLMArithCosts())
1067 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1068 if (
auto KindCost = Entry->Cost[
CostKind])
1069 return LT.first * *KindCost;
1072 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1073 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1074 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1075 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1077 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1078 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1079 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1080 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1082 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1083 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1084 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1085 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1086 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1087 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1089 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1090 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1091 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1092 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1093 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1094 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1095 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1096 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1098 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1099 {
ISD::MUL, MVT::v32i8, { 6, 11,10,19 } },
1100 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1101 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1102 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1103 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1104 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1108 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1109 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1111 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1112 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1113 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1114 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1115 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1116 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1118 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1119 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1120 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1121 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1122 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1123 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1125 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1126 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1127 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1128 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1129 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1130 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1132 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1133 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1134 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1135 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1136 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1137 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1142 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1143 if (
auto KindCost = Entry->Cost[
CostKind])
1144 return LT.first * *KindCost;
1150 {
ISD::MUL, MVT::v32i8, { 12, 13, 22, 23 } },
1151 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1152 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1153 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1154 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1156 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1157 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1158 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1159 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1161 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1162 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1163 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1164 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1166 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1167 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1168 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1169 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1171 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1172 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1173 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1174 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1175 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1176 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1177 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1178 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1179 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1180 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1182 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1183 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1184 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1185 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1186 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1187 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1188 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1189 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1191 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1192 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1193 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1194 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1195 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1196 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1197 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1198 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1200 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1201 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1202 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1203 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1204 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1205 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1206 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1207 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1209 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1210 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1212 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1213 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1214 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1215 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1216 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1217 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1219 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1220 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1221 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1222 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1223 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1224 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1226 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1227 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1228 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1229 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1230 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1231 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1233 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1234 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1235 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1236 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1237 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1238 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1242 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1243 if (
auto KindCost = Entry->Cost[
CostKind])
1244 return LT.first * *KindCost;
1247 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1248 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1249 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1250 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1252 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1253 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1254 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1255 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1257 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1258 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1259 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1260 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1262 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1263 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1264 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1265 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1267 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1271 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1272 if (
auto KindCost = Entry->Cost[
CostKind])
1273 return LT.first * *KindCost;
1276 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1277 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1278 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1280 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1281 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1282 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1283 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1285 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1286 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1287 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1288 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1290 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1291 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1295 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1296 if (
auto KindCost = Entry->Cost[
CostKind])
1297 return LT.first * *KindCost;
1302 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1303 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1304 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1305 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1307 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1308 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1309 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1310 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1312 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1313 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1314 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1315 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1317 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1318 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1319 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1320 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1322 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1323 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1324 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1325 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1327 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1328 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1329 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1330 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1332 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1333 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1335 {
ISD::MUL, MVT::v16i8, { 5, 18,12,12 } },
1336 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1337 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1338 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1342 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1343 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1344 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1345 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1347 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1348 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1349 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1350 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1352 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1353 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1354 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1356 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1357 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1358 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1360 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1361 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1365 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1366 if (
auto KindCost = Entry->Cost[
CostKind])
1367 return LT.first * *KindCost;
1370 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1371 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1373 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1374 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1376 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1377 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1379 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1380 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1382 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1383 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1387 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1388 if (
auto KindCost = Entry->Cost[
CostKind])
1389 return LT.first * *KindCost;
1394 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1399 if (
auto KindCost = Entry->Cost[
CostKind])
1400 return LT.first * *KindCost;
1411 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1412 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1413 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1415 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1416 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1417 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1418 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1419 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1423 if (
auto KindCost = Entry->Cost[
CostKind])
1424 return LT.first * *KindCost;
1438 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1487 CostKind, Mask.size() / 2, BaseTp);
1500 using namespace PatternMatch;
1503 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1508 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1509 LT.second = LT.second.changeVectorElementType(MVT::f16);
1514 int NumElts = LT.second.getVectorNumElements();
1515 if ((
Index % NumElts) == 0)
1518 if (SubLT.second.isVector()) {
1519 int NumSubElts = SubLT.second.getVectorNumElements();
1520 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1528 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1529 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
1530 (NumSubElts % OrigSubElts) == 0 &&
1531 LT.second.getVectorElementType() ==
1532 SubLT.second.getVectorElementType() &&
1533 LT.second.getVectorElementType().getSizeInBits() ==
1535 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1536 "Unexpected number of elements!");
1538 LT.second.getVectorNumElements());
1540 SubLT.second.getVectorNumElements());
1549 return ExtractCost + 1;
1552 "Unexpected vector size");
1554 return ExtractCost + 2;
1565 int NumElts = LT.second.getVectorNumElements();
1567 if (SubLT.second.isVector()) {
1568 int NumSubElts = SubLT.second.getVectorNumElements();
1569 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1582 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1613 if (
const auto *Entry =
1622 MVT LegalVT = LT.second;
1627 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1631 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1638 if (!Mask.empty() && NumOfDests.
isValid()) {
1656 unsigned E = *NumOfDests.
getValue();
1657 unsigned NormalizedVF =
1663 unsigned PrevSrcReg = 0;
1667 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1668 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1673 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1674 PrevRegMask != RegMask)
1682 if (SrcReg != DestReg &&
1687 PrevSrcReg = SrcReg;
1688 PrevRegMask = RegMask;
1701 std::nullopt,
CostKind, 0,
nullptr);
1712 LT.first = NumOfDests * NumOfShufflesPerDest;
1728 if (
const auto *Entry =
1730 return LT.first * Entry->Cost;
1763 if (
const auto *Entry =
1765 return LT.first * Entry->Cost;
1842 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1843 if (
auto KindCost = Entry->Cost[
CostKind])
1844 return LT.first * *KindCost;
1897 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1898 return LT.first * Entry->Cost;
1919 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
1920 return LT.first * Entry->Cost;
1982 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
1983 return LT.first * Entry->Cost;
1996 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
1997 return LT.first * Entry->Cost;
2028 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2029 return LT.first * Entry->Cost;
2085 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2087 if (
const auto *Entry =
2090 LT.second.getVectorElementCount()) &&
2091 "Table entry missing from isLegalBroadcastLoad()");
2092 return LT.first * Entry->Cost;
2095 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2096 return LT.first * Entry->Cost;
2109 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2110 return LT.first * Entry->Cost;
2121 assert(ISD &&
"Invalid opcode");
2127 return Cost == 0 ? 0 :
N;
2942 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2943 return AdjustCost(Entry->Cost);
2947 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2948 return AdjustCost(Entry->Cost);
2952 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2953 return AdjustCost(Entry->Cost);
2958 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2959 return AdjustCost(Entry->Cost);
2963 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2964 return AdjustCost(Entry->Cost);
2968 SimpleDstTy, SimpleSrcTy))
2969 return AdjustCost(Entry->Cost);
2973 SimpleDstTy, SimpleSrcTy))
2974 return AdjustCost(Entry->Cost);
2979 SimpleDstTy, SimpleSrcTy))
2980 return AdjustCost(Entry->Cost);
2985 SimpleDstTy, SimpleSrcTy))
2986 return AdjustCost(Entry->Cost);
2991 SimpleDstTy, SimpleSrcTy))
2992 return AdjustCost(Entry->Cost);
3007 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3008 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3012 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3013 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3017 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3018 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3023 LTDest.second, LTSrc.second))
3024 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3028 LTDest.second, LTSrc.second))
3029 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3033 LTDest.second, LTSrc.second))
3034 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3038 LTDest.second, LTSrc.second))
3039 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3043 LTDest.second, LTSrc.second))
3044 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3048 LTDest.second, LTSrc.second))
3049 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3053 LTDest.second, LTSrc.second))
3054 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3059 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3060 Type *ExtSrc = Src->getWithNewBitWidth(32);
3066 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3076 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3077 Type *TruncDst = Dst->getWithNewBitWidth(32);
3100 MVT MTy = LT.second;
3103 assert(ISD &&
"Invalid opcode");
3106 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3119 Pred = cast<CmpInst>(
I)->getPredicate();
3121 bool CmpWithConstant =
false;
3122 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3123 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3128 ExtraCost = CmpWithConstant ? 0 : 1;
3133 ExtraCost = CmpWithConstant ? 0 : 1;
3139 ExtraCost = CmpWithConstant ? 1 : 2;
3150 ExtraCost = CmpWithConstant ? 2 : 3;
3157 if (CondTy && !ST->
hasAVX())
3326 if (ST->useSLMArithCosts())
3328 if (
auto KindCost = Entry->Cost[
CostKind])
3329 return LT.first * (ExtraCost + *KindCost);
3333 if (
auto KindCost = Entry->Cost[
CostKind])
3334 return LT.first * (ExtraCost + *KindCost);
3338 if (
auto KindCost = Entry->Cost[
CostKind])
3339 return LT.first * (ExtraCost + *KindCost);
3343 if (
auto KindCost = Entry->Cost[
CostKind])
3344 return LT.first * (ExtraCost + *KindCost);
3348 if (
auto KindCost = Entry->Cost[
CostKind])
3349 return LT.first * (ExtraCost + *KindCost);
3353 if (
auto KindCost = Entry->Cost[
CostKind])
3354 return LT.first * (ExtraCost + *KindCost);
3358 if (
auto KindCost = Entry->Cost[
CostKind])
3359 return LT.first * (ExtraCost + *KindCost);
3363 if (
auto KindCost = Entry->Cost[
CostKind])
3364 return LT.first * (ExtraCost + *KindCost);
3368 if (
auto KindCost = Entry->Cost[
CostKind])
3369 return LT.first * (ExtraCost + *KindCost);
3373 if (
auto KindCost = Entry->Cost[
CostKind])
3374 return LT.first * (ExtraCost + *KindCost);
3399 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3400 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3401 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3402 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3403 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3404 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3405 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3406 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3407 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3408 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3409 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3410 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3411 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3412 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3413 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3435 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3436 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3437 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3438 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3439 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3440 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3441 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3442 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3443 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3444 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3445 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3446 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3448 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3449 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3450 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3451 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3452 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3453 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3456 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3457 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3479 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3480 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3481 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3482 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3483 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3484 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3485 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3486 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3487 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3488 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3489 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3490 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3491 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3495 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3496 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3497 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3498 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3499 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3500 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3501 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3502 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3503 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3504 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3505 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3506 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3507 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3508 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3509 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3510 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3511 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3512 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3521 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3522 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3523 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3524 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3529 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3530 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3531 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3532 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3537 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3538 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3539 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3540 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3541 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3542 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3543 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3544 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3545 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3553 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3554 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3555 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3556 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3557 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3558 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3559 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3560 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3561 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3562 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3563 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3564 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3565 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3566 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3567 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3568 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3569 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3570 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3571 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3572 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3573 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3574 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3575 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3576 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3583 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3584 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3585 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3586 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3587 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3588 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3589 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3590 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3591 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3592 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3593 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3594 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3595 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3596 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3597 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3598 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3599 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3600 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3601 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3602 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3603 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3604 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3605 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3606 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3634 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3637 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3638 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3654 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3655 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3656 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3657 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3658 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3659 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3660 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3661 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3662 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3663 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3664 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3665 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3666 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3667 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3668 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3669 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3680 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3681 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3682 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3683 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3684 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3685 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3686 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3687 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3702 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3703 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3704 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3705 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3706 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3707 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3708 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3709 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3710 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3711 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3712 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3713 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3714 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3715 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3718 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3719 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3720 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3721 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3722 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3723 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3724 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3725 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3728 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3729 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3730 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3731 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3732 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3733 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3734 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3735 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3736 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3737 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3743 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3744 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3745 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3746 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3747 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3748 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3749 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3750 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3751 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3752 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3764 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
3766 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
3767 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
3770 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
3771 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
3772 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
3773 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
3786 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
3788 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
3789 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
3790 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
3791 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
3792 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
3793 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
3794 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
3795 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
3796 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
3797 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
3798 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
3799 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
3800 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
3801 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
3802 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
3803 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
3804 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
3805 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
3806 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
3807 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
3808 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
3809 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
3810 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
3811 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
3814 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
3815 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
3816 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3817 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3818 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3819 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
3820 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3821 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3822 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3823 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3829 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
3830 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
3831 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3832 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3833 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3834 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
3835 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
3836 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3837 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3838 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3849 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
3850 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
3852 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
3853 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
3875 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
3877 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
3884 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
3886 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
3896 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
3899 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
3900 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
3901 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3902 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3903 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3904 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3905 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3906 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
3907 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3908 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3909 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
3910 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3911 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3914 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
3915 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
3916 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
3924 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
3925 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
3926 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
3927 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
3928 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
3929 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
3930 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
3931 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
3932 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
3933 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
3934 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
3935 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
3938 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
3939 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
3940 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
3941 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
3946 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
3949 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
3950 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
3951 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
3952 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
3953 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
3954 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
3955 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
3956 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
3957 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
3958 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
3959 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
3960 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
3963 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
3964 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
3965 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3966 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
3967 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
3968 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
3969 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3970 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
3975 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
3976 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
3977 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
3978 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3979 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
3980 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
3981 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
3982 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3988 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
3994 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4021 {
ISD::ABS, MVT::i64, { 1, 2, 3, 4 } },
4029 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4030 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4032 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4033 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4034 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4035 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4036 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4042 {
ISD::ABS, MVT::i32, { 1, 2, 3, 4 } },
4043 {
ISD::ABS, MVT::i16, { 2, 2, 3, 4 } },
4044 {
ISD::ABS, MVT::i8, { 2, 4, 4, 4 } },
4065 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4066 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4068 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4069 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4074 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4075 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4077 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4078 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4080 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4081 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4083 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4084 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4086 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4087 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4107 case Intrinsic::abs:
4110 case Intrinsic::bitreverse:
4113 case Intrinsic::bswap:
4116 case Intrinsic::ctlz:
4119 case Intrinsic::ctpop:
4122 case Intrinsic::cttz:
4125 case Intrinsic::fshl:
4129 if (Args[0] == Args[1]) {
4140 case Intrinsic::fshr:
4145 if (Args[0] == Args[1]) {
4156 case Intrinsic::lrint:
4157 case Intrinsic::llrint:
4166 case Intrinsic::maxnum:
4167 case Intrinsic::minnum:
4171 case Intrinsic::sadd_sat:
4174 case Intrinsic::smax:
4177 case Intrinsic::smin:
4180 case Intrinsic::ssub_sat:
4183 case Intrinsic::uadd_sat:
4186 case Intrinsic::umax:
4189 case Intrinsic::umin:
4192 case Intrinsic::usub_sat:
4195 case Intrinsic::sqrt:
4198 case Intrinsic::sadd_with_overflow:
4199 case Intrinsic::ssub_with_overflow:
4202 OpTy =
RetTy->getContainedType(0);
4204 case Intrinsic::uadd_with_overflow:
4205 case Intrinsic::usub_with_overflow:
4208 OpTy =
RetTy->getContainedType(0);
4210 case Intrinsic::umul_with_overflow:
4211 case Intrinsic::smul_with_overflow:
4214 OpTy =
RetTy->getContainedType(0);
4221 MVT MTy = LT.second;
4224 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4225 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4228 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4229 if (Cst->isAllOnesValue())
4237 auto adjustTableCost = [](
int ISD,
unsigned Cost,
4245 return LegalizationCost * 1;
4247 return LegalizationCost * (int)
Cost;
4250 if (ST->useGLMDivSqrtCosts())
4252 if (
auto KindCost = Entry->Cost[
CostKind])
4253 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4256 if (ST->useSLMArithCosts())
4258 if (
auto KindCost = Entry->Cost[
CostKind])
4259 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4263 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4264 if (
auto KindCost = Entry->Cost[
CostKind])
4265 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4268 if (ST->hasBITALG())
4269 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4270 if (
auto KindCost = Entry->Cost[
CostKind])
4271 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4274 if (ST->hasVPOPCNTDQ())
4275 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4276 if (
auto KindCost = Entry->Cost[
CostKind])
4277 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4282 if (
auto KindCost = Entry->Cost[
CostKind])
4283 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4288 if (
auto KindCost = Entry->Cost[
CostKind])
4289 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4294 if (
auto KindCost = Entry->Cost[
CostKind])
4295 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4300 if (
auto KindCost = Entry->Cost[
CostKind])
4301 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4306 if (
auto KindCost = Entry->Cost[
CostKind])
4307 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4312 if (
auto KindCost = Entry->Cost[
CostKind])
4313 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4318 if (
auto KindCost = Entry->Cost[
CostKind])
4319 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4324 if (
auto KindCost = Entry->Cost[
CostKind])
4325 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4330 if (
auto KindCost = Entry->Cost[
CostKind])
4331 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4336 if (
auto KindCost = Entry->Cost[
CostKind])
4337 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4342 if (
auto KindCost = Entry->Cost[
CostKind])
4343 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4348 if (
auto KindCost = Entry->Cost[
CostKind])
4349 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4355 if (
auto KindCost = Entry->Cost[
CostKind])
4356 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4360 if (
auto KindCost = Entry->Cost[
CostKind])
4361 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4365 if (ST->hasLZCNT()) {
4368 if (
auto KindCost = Entry->Cost[
CostKind])
4369 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4373 if (
auto KindCost = Entry->Cost[
CostKind])
4374 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4378 if (ST->hasPOPCNT()) {
4381 if (
auto KindCost = Entry->Cost[
CostKind])
4382 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4386 if (
auto KindCost = Entry->Cost[
CostKind])
4387 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4391 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4393 if (II->hasOneUse() && isa<StoreInst>(II->user_back()))
4395 if (
auto *LI = dyn_cast<LoadInst>(II->getOperand(0))) {
4396 if (LI->hasOneUse())
4404 if (
auto KindCost = Entry->Cost[
CostKind])
4405 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4409 if (
auto KindCost = Entry->Cost[
CostKind])
4410 return adjustTableCost(Entry->ISD, *KindCost, LT.first, ICA.
getFlags());
4433 if (
Index == -1U && (Opcode == Instruction::ExtractElement ||
4434 Opcode == Instruction::InsertElement)) {
4439 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4444 if (Opcode == Instruction::ExtractElement) {
4450 if (Opcode == Instruction::InsertElement) {
4458 if (
Index != -1U && (Opcode == Instruction::ExtractElement ||
4459 Opcode == Instruction::InsertElement)) {
4461 if (Opcode == Instruction::ExtractElement &&
4463 cast<FixedVectorType>(Val)->getNumElements() > 1)
4470 if (!LT.second.isVector())
4474 unsigned SizeInBits = LT.second.getSizeInBits();
4475 unsigned NumElts = LT.second.getVectorNumElements();
4476 unsigned SubNumElts = NumElts;
4481 if (SizeInBits > 128) {
4482 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4483 unsigned NumSubVecs = SizeInBits / 128;
4484 SubNumElts = NumElts / NumSubVecs;
4485 if (SubNumElts <=
Index) {
4486 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4487 Index %= SubNumElts;
4491 MVT MScalarTy = LT.second.getScalarType();
4492 auto IsCheapPInsrPExtrInsertPS = [&]() {
4495 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4497 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4498 Opcode == Instruction::InsertElement);
4506 (Opcode != Instruction::InsertElement || !Op0 ||
4507 isa<UndefValue>(Op0)))
4508 return RegisterFileMoveCost;
4510 if (Opcode == Instruction::InsertElement &&
4511 isa_and_nonnull<UndefValue>(Op0)) {
4513 if (isa_and_nonnull<LoadInst>(Op1))
4514 return RegisterFileMoveCost;
4515 if (!IsCheapPInsrPExtrInsertPS()) {
4518 return 2 + RegisterFileMoveCost;
4520 return 1 + RegisterFileMoveCost;
4525 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4526 return 1 + RegisterFileMoveCost;
4530 assert(ISD &&
"Unexpected vector opcode");
4531 if (ST->useSLMArithCosts())
4533 return Entry->Cost + RegisterFileMoveCost;
4536 if (IsCheapPInsrPExtrInsertPS())
4537 return 1 + RegisterFileMoveCost;
4546 if (Opcode == Instruction::InsertElement) {
4547 auto *SubTy = cast<VectorType>(Val);
4555 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4559 RegisterFileMoveCost;
4564 bool Insert,
bool Extract,
4567 cast<FixedVectorType>(Ty)->getNumElements() &&
4568 "Vector size mismatch");
4571 MVT MScalarTy = LT.second.getScalarType();
4572 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4575 constexpr unsigned LaneBitWidth = 128;
4576 assert((LegalVectorBitWidth < LaneBitWidth ||
4577 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4580 const int NumLegalVectors = *LT.first.getValue();
4581 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4586 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4588 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4591 if (LegalVectorBitWidth <= LaneBitWidth) {
4607 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4608 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4609 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4610 unsigned NumLegalElts =
4611 LT.second.getVectorNumElements() * NumLegalVectors;
4613 "Vector has been legalized to smaller element count");
4614 assert((NumLegalElts % NumLanesTotal) == 0 &&
4615 "Unexpected elts per lane");
4616 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4618 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4622 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4624 NumEltsPerLane, NumEltsPerLane *
I);
4625 if (LaneEltMask.
isZero())
4636 APInt AffectedLanes =
4639 AffectedLanes, NumLegalVectors,
true);
4640 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4641 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4642 unsigned I = NumLegalLanes * LegalVec + Lane;
4645 if (!AffectedLanes[
I] ||
4646 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4653 }
else if (LT.second.isVector()) {
4664 unsigned NumElts = LT.second.getVectorNumElements();
4666 PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements());
4667 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4676 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4677 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4678 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4682 if (LT.second.isVector()) {
4683 unsigned NumLegalElts =
4684 LT.second.getVectorNumElements() * NumLegalVectors;
4686 "Vector has been legalized to smaller element count");
4690 if (LegalVectorBitWidth > LaneBitWidth) {
4691 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4692 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4693 assert((NumLegalElts % NumLanesTotal) == 0 &&
4694 "Unexpected elts per lane");
4695 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4699 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4703 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4705 NumEltsPerLane,
I * NumEltsPerLane);
4706 if (LaneEltMask.
isZero())
4711 LaneTy, LaneEltMask,
false, Extract,
CostKind);
4728 int VF,
const APInt &DemandedDstElts,
4734 auto bailout = [&]() {
4744 unsigned PromEltTyBits = EltTyBits;
4745 switch (EltTyBits) {
4776 int NumDstElements = VF * ReplicationFactor;
4790 if (PromEltTyBits != EltTyBits) {
4796 Instruction::SExt, PromSrcVecTy, SrcVecTy,
4803 ReplicationFactor, VF,
4809 "We expect that the legalization doesn't affect the element width, "
4810 "doesn't coalesce/split elements.");
4813 unsigned NumDstVectors =
4814 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
4823 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
4824 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
4829 return NumDstVectorsDemanded * SingleShuffleCost;
4840 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
4843 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
4844 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
4851 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4861 auto *VTy = dyn_cast<FixedVectorType>(Src);
4866 if (Opcode == Instruction::Store && OpInfo.
isConstant())
4872 if (!VTy || !LT.second.isVector()) {
4874 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
4877 bool IsLoad = Opcode == Instruction::Load;
4879 Type *EltTy = VTy->getElementType();
4884 const unsigned SrcNumElt = VTy->getNumElements();
4887 int NumEltRemaining = SrcNumElt;
4889 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
4891 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
4894 const unsigned XMMBits = 128;
4895 if (XMMBits % EltTyBits != 0)
4899 const int NumEltPerXMM = XMMBits / EltTyBits;
4903 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
4904 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
4906 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
4910 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
4912 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
4913 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
4914 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
4915 "Unless we haven't halved the op size yet, "
4916 "we have less than two op's sized units of work left.");
4918 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
4922 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
4923 "After halving sizes, the vector elt count is no longer a multiple "
4924 "of number of elements per operation?");
4925 auto *CoalescedVecTy =
4926 CurrNumEltPerOp == 1
4930 EltTyBits * CurrNumEltPerOp),
4931 CurrVecTy->getNumElements() / CurrNumEltPerOp);
4934 "coalesciing elements doesn't change vector width.");
4936 while (NumEltRemaining > 0) {
4937 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
4941 if (NumEltRemaining < CurrNumEltPerOp &&
4942 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
4943 CurrOpSizeBytes != 1)
4946 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
4949 if (SubVecEltsLeft == 0) {
4950 SubVecEltsLeft += CurrVecTy->getNumElements();
4955 VTy, std::nullopt,
CostKind, NumEltDone(),
4963 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
4964 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
4965 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
4966 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
4967 APInt DemandedElts =
4969 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
4970 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
4980 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
4982 else if (CurrOpSizeBytes < 4)
4987 SubVecEltsLeft -= CurrNumEltPerOp;
4988 NumEltRemaining -= CurrNumEltPerOp;
4993 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5002 bool IsLoad = (Instruction::Load == Opcode);
5003 bool IsStore = (Instruction::Store == Opcode);
5005 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5010 unsigned NumElem = SrcVTy->getNumElements();
5018 MaskTy, DemandedElts,
false,
true,
CostKind);
5023 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5025 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5029 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5036 if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
5037 LT.second.getVectorNumElements() == NumElem)
5044 else if (LT.first * LT.second.getVectorNumElements() > NumElem) {
5046 LT.second.getVectorNumElements());
5054 return Cost + LT.first * (IsLoad ? 2 : 8);
5057 return Cost + LT.first;
5065 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5069 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5071 return getGEPCost(BaseGEP->getSourceElementType(),
5072 BaseGEP->getPointerOperand(), Indices,
nullptr,
5087 const unsigned NumVectorInstToHideOverhead = 10;
5100 return NumVectorInstToHideOverhead;
5110 std::optional<FastMathFlags> FMF,
5151 assert(ISD &&
"Invalid opcode");
5159 if (ST->useSLMArithCosts())
5174 MVT MTy = LT.second;
5176 auto *ValVTy = cast<FixedVectorType>(ValTy);
5189 if (LT.first != 1 && MTy.
isVector() &&
5195 ArithmeticCost *= LT.first - 1;
5198 if (ST->useSLMArithCosts())
5200 return ArithmeticCost + Entry->Cost;
5204 return ArithmeticCost + Entry->Cost;
5208 return ArithmeticCost + Entry->Cost;
5257 if (ValVTy->getElementType()->isIntegerTy(1)) {
5259 if (LT.first != 1 && MTy.
isVector() &&
5265 ArithmeticCost *= LT.first - 1;
5269 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5270 return ArithmeticCost + Entry->Cost;
5273 return ArithmeticCost + Entry->Cost;
5276 return ArithmeticCost + Entry->Cost;
5279 return ArithmeticCost + Entry->Cost;
5284 unsigned NumVecElts = ValVTy->getNumElements();
5285 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5295 if (LT.first != 1 && MTy.
isVector() &&
5301 ReductionCost *= LT.first - 1;
5307 while (NumVecElts > 1) {
5309 unsigned Size = NumVecElts * ScalarSize;
5318 }
else if (
Size == 128) {
5321 if (ValVTy->isFloatingPointTy())
5328 std::nullopt,
CostKind, 0,
nullptr);
5329 }
else if (
Size == 64) {
5332 if (ValVTy->isFloatingPointTy())
5339 std::nullopt,
CostKind, 0,
nullptr);
5345 Instruction::LShr, ShiftTy,
CostKind,
5372 MVT MTy = LT.second;
5376 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5380 "Expected float point or integer vector type.");
5381 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5449 auto *ValVTy = cast<FixedVectorType>(ValTy);
5450 unsigned NumVecElts = ValVTy->getNumElements();
5454 if (LT.first != 1 && MTy.
isVector() &&
5460 MinMaxCost *= LT.first - 1;
5466 return MinMaxCost + Entry->Cost;
5470 return MinMaxCost + Entry->Cost;
5474 return MinMaxCost + Entry->Cost;
5478 return MinMaxCost + Entry->Cost;
5490 while (NumVecElts > 1) {
5492 unsigned Size = NumVecElts * ScalarSize;
5500 }
else if (
Size == 128) {
5509 std::nullopt,
CostKind, 0,
nullptr);
5510 }
else if (
Size == 64) {
5518 std::nullopt,
CostKind, 0,
nullptr);
5571 if (BitSize % 64 != 0)
5572 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5577 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5583 return std::max<InstructionCost>(1,
Cost);
5598 unsigned ImmIdx = ~0U;
5602 case Instruction::GetElementPtr:
5609 case Instruction::Store:
5612 case Instruction::ICmp:
5618 if (
Idx == 1 && Imm.getBitWidth() == 64) {
5619 uint64_t ImmVal = Imm.getZExtValue();
5620 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5625 case Instruction::And:
5629 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.isIntN(32))
5633 case Instruction::Add:
5634 case Instruction::Sub:
5636 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
5640 case Instruction::UDiv:
5641 case Instruction::SDiv:
5642 case Instruction::URem:
5643 case Instruction::SRem:
5648 case Instruction::Mul:
5649 case Instruction::Or:
5650 case Instruction::Xor:
5654 case Instruction::Shl:
5655 case Instruction::LShr:
5656 case Instruction::AShr:
5660 case Instruction::Trunc:
5661 case Instruction::ZExt:
5662 case Instruction::SExt:
5663 case Instruction::IntToPtr:
5664 case Instruction::PtrToInt:
5665 case Instruction::BitCast:
5666 case Instruction::PHI:
5667 case Instruction::Call:
5668 case Instruction::Select:
5669 case Instruction::Ret:
5670 case Instruction::Load:
5674 if (
Idx == ImmIdx) {
5699 case Intrinsic::sadd_with_overflow:
5700 case Intrinsic::uadd_with_overflow:
5701 case Intrinsic::ssub_with_overflow:
5702 case Intrinsic::usub_with_overflow:
5703 case Intrinsic::smul_with_overflow:
5704 case Intrinsic::umul_with_overflow:
5705 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
5708 case Intrinsic::experimental_stackmap:
5709 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5712 case Intrinsic::experimental_patchpoint_void:
5713 case Intrinsic::experimental_patchpoint:
5714 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5725 return Opcode == Instruction::PHI ? 0 : 1;
5730int X86TTIImpl::getGatherOverhead()
const {
5743int X86TTIImpl::getScatterOverhead()
const {
5758 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
5759 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5769 if (IndexSize < 64 || !
GEP)
5772 unsigned NumOfVarIndices = 0;
5773 const Value *Ptrs =
GEP->getPointerOperand();
5776 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
5777 if (isa<Constant>(
GEP->getOperand(
I)))
5779 Type *IndxTy =
GEP->getOperand(
I)->getType();
5780 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
5781 IndxTy = IndexVTy->getElementType();
5783 !isa<SExtInst>(
GEP->getOperand(
I))) ||
5784 ++NumOfVarIndices > 1)
5787 return (
unsigned)32;
5792 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
5793 ? getIndexSizeInBits(
Ptr,
DL)
5801 *std::max(IdxsLT.first, SrcLT.first).getValue();
5802 if (SplitFactor > 1) {
5806 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
5812 const int GSOverhead = (Opcode == Instruction::Load)
5813 ? getGatherOverhead()
5814 : getScatterOverhead();
5830 Type *SrcVTy,
bool VariableMask,
5834 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5842 MaskTy, DemandedElts,
false,
true,
CostKind);
5847 MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
5852 DemandedElts,
false,
true,
CostKind);
5862 cast<FixedVectorType>(SrcVTy), DemandedElts,
5863 Opcode == Instruction::Load,
5864 Opcode == Instruction::Store,
CostKind);
5866 return AddressUnpackCost + MemoryOpCost + MaskUnpackCost + InsertExtractCost;
5871 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
5875 if ((Opcode == Instruction::Load &&
5878 Align(Alignment))) ||
5879 (Opcode == Instruction::Store &&
5890 if (!PtrTy &&
Ptr->getType()->isVectorTy())
5891 PtrTy = dyn_cast<PointerType>(
5892 cast<VectorType>(
Ptr->getType())->getElementType());
5893 assert(PtrTy &&
"Unexpected type for Ptr argument");
5896 if ((Opcode == Instruction::Load &&
5899 Align(Alignment)))) ||
5900 (Opcode == Instruction::Store &&
5903 Align(Alignment)))))
5904 return getGSScalarCost(Opcode,
CostKind, SrcVTy, VariableMask, Alignment,
5907 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
5923 return ST->hasMacroFusion() || ST->hasBranchFusion();
5931 if (isa<VectorType>(DataTy) &&
5932 cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5942 if (ScalarTy->
isHalfTy() && ST->hasBWI())
5952 return IntWidth == 32 || IntWidth == 64 ||
5953 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
5965 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
5982 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6004 if (!isa<VectorType>(DataTy))
6011 if (cast<FixedVectorType>(DataTy)->getNumElements() == 1)
6014 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6023 return IntWidth == 32 || IntWidth == 64 ||
6024 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6031bool X86TTIImpl::supportsGather()
const {
6045 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6046 return NumElts == 1 ||
6047 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6062 return IntWidth == 32 || IntWidth == 64;
6066 if (!supportsGather() || !ST->preferGather())
6081 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6082 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6087 for (
int Lane : seq<int>(0, NumElements)) {
6088 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6090 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6092 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6096 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6098 return ST->
hasSSE3() && NumElements % 4 == 0;
6100 return ST->
hasSSE3() && NumElements % 2 == 0;
6106 if (!ST->
hasAVX512() || !ST->preferScatter())
6119 if (
I->getOpcode() == Instruction::FDiv)
6135 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6137 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6140 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6141 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6142 if (RealCallerBits == RealCalleeBits)
6147 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6151 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6153 if (CB->isInlineAsm())
6157 for (
Value *Arg : CB->args())
6158 Types.push_back(Arg->getType());
6159 if (!CB->getType()->isVoidTy())
6160 Types.push_back(CB->getType());
6163 auto IsSimpleTy = [](
Type *Ty) {
6164 return !Ty->isVectorTy() && !Ty->isAggregateType();
6166 if (
all_of(Types, IsSimpleTy))
6169 if (
Function *NestedCallee = CB->getCalledFunction()) {
6171 if (NestedCallee->isIntrinsic())
6206 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6215 Options.AllowOverlappingLoads =
true;
6220 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6221 Options.LoadSizes.push_back(64);
6222 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6223 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6225 if (ST->is64Bit()) {
6226 Options.LoadSizes.push_back(8);
6228 Options.LoadSizes.push_back(4);
6229 Options.LoadSizes.push_back(2);
6230 Options.LoadSizes.push_back(1);
6235 return supportsGather();
6246 return !(ST->isAtom());
6266 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6272 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6284 if (UseMaskedMemOp) {
6286 for (
unsigned Index : Indices) {
6287 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
6288 for (
unsigned Elm = 0; Elm < VF; Elm++)
6289 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
6296 UseMaskForGaps ? DemandedLoadStoreElts
6305 if (UseMaskForGaps) {
6311 if (Opcode == Instruction::Load) {
6318 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6319 {3, MVT::v16i8, 12},
6320 {3, MVT::v32i8, 14},
6321 {3, MVT::v64i8, 22},
6324 if (
const auto *Entry =
6326 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6336 ShuffleKind, SingleMemOpTy, std::nullopt,
CostKind, 0,
nullptr);
6338 unsigned NumOfLoadsInInterleaveGrp =
6339 Indices.
size() ? Indices.
size() : Factor;
6348 unsigned NumOfUnfoldedLoads =
6349 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6352 unsigned NumOfShufflesPerResult =
6353 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6360 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6363 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6370 assert(Opcode == Instruction::Store &&
6371 "Expected Store Instruction at this point");
6373 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6374 {3, MVT::v16i8, 12},
6375 {3, MVT::v32i8, 14},
6376 {3, MVT::v64i8, 26},
6379 {4, MVT::v16i8, 11},
6380 {4, MVT::v32i8, 14},
6384 if (
const auto *Entry =
6386 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6391 unsigned NumOfSources = Factor;
6394 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6398 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6401 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6409 bool UseMaskForCond,
bool UseMaskForGaps) {
6410 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6412 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6413 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6418 return ST->hasBWI();
6420 return ST->hasBF16();
6423 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6425 Opcode, VecTy, Factor, Indices, Alignment,
6428 if (UseMaskForCond || UseMaskForGaps)
6431 UseMaskForCond, UseMaskForGaps);
6451 unsigned VF = VecTy->getNumElements() / Factor;
6452 Type *ScalarTy = VecTy->getElementType();
6484 {2, MVT::v16i16, 9},
6485 {2, MVT::v32i16, 18},
6488 {2, MVT::v16i32, 8},
6489 {2, MVT::v32i32, 16},
6493 {2, MVT::v16i64, 16},
6494 {2, MVT::v32i64, 32},
6499 {3, MVT::v16i8, 11},
6500 {3, MVT::v32i8, 14},
6505 {3, MVT::v16i16, 28},
6506 {3, MVT::v32i16, 56},
6511 {3, MVT::v16i32, 14},
6512 {3, MVT::v32i32, 32},
6516 {3, MVT::v8i64, 10},
6517 {3, MVT::v16i64, 20},
6522 {4, MVT::v16i8, 24},
6523 {4, MVT::v32i8, 56},
6526 {4, MVT::v4i16, 17},
6527 {4, MVT::v8i16, 33},
6528 {4, MVT::v16i16, 75},
6529 {4, MVT::v32i16, 150},
6533 {4, MVT::v8i32, 16},
6534 {4, MVT::v16i32, 32},
6535 {4, MVT::v32i32, 68},
6539 {4, MVT::v8i64, 20},
6540 {4, MVT::v16i64, 40},
6545 {6, MVT::v16i8, 43},
6546 {6, MVT::v32i8, 82},
6548 {6, MVT::v2i16, 13},
6550 {6, MVT::v8i16, 39},
6551 {6, MVT::v16i16, 106},
6552 {6, MVT::v32i16, 212},
6555 {6, MVT::v4i32, 15},
6556 {6, MVT::v8i32, 31},
6557 {6, MVT::v16i32, 64},
6560 {6, MVT::v4i64, 18},
6561 {6, MVT::v8i64, 36},
6566 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6580 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6585 {2, MVT::v16i16, 4},
6586 {2, MVT::v32i16, 8},
6590 {2, MVT::v16i32, 8},
6591 {2, MVT::v32i32, 16},
6596 {2, MVT::v16i64, 16},
6597 {2, MVT::v32i64, 32},
6602 {3, MVT::v16i8, 11},
6603 {3, MVT::v32i8, 13},
6607 {3, MVT::v8i16, 12},
6608 {3, MVT::v16i16, 27},
6609 {3, MVT::v32i16, 54},
6613 {3, MVT::v8i32, 11},
6614 {3, MVT::v16i32, 22},
6615 {3, MVT::v32i32, 48},
6619 {3, MVT::v8i64, 12},
6620 {3, MVT::v16i64, 24},
6626 {4, MVT::v32i8, 12},
6630 {4, MVT::v8i16, 10},
6631 {4, MVT::v16i16, 32},
6632 {4, MVT::v32i16, 64},
6636 {4, MVT::v8i32, 16},
6637 {4, MVT::v16i32, 32},
6638 {4, MVT::v32i32, 64},
6642 {4, MVT::v8i64, 20},
6643 {4, MVT::v16i64, 40},
6648 {6, MVT::v16i8, 27},
6649 {6, MVT::v32i8, 90},
6651 {6, MVT::v2i16, 10},
6652 {6, MVT::v4i16, 15},
6653 {6, MVT::v8i16, 21},
6654 {6, MVT::v16i16, 58},
6655 {6, MVT::v32i16, 90},
6658 {6, MVT::v4i32, 12},
6659 {6, MVT::v8i32, 33},
6660 {6, MVT::v16i32, 66},
6663 {6, MVT::v4i64, 15},
6664 {6, MVT::v8i64, 30},
6667 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6678 if (Opcode == Instruction::Load) {
6679 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6683 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6687 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6689 return GetDiscountedCost(Entry);
6692 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6694 return GetDiscountedCost(Entry);
6697 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6699 return GetDiscountedCost(Entry);
6701 assert(Opcode == Instruction::Store &&
6702 "Expected Store Instruction at this point");
6704 "Interleaved store only supports fully-interleaved groups.");
6706 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6708 return MemOpCosts + Entry->Cost;
6711 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6713 return MemOpCosts + Entry->Cost;
6718 UseMaskForCond, UseMaskForGaps);
6723 bool HasBaseReg, int64_t Scale,
6724 unsigned AddrSpace)
const {
6751 return AM.
Scale != 0;
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.