63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
164 bool Vector = (ClassID == 1);
183 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
185 if (ST->
hasAVX() && PreferVectorWidth >= 256)
187 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
228 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
245 assert(ISD &&
"Invalid opcode");
247 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
248 (LT.second.getScalarType() == MVT::i32 ||
249 LT.second.getScalarType() == MVT::i64)) {
251 bool Op1Signed =
false, Op2Signed =
false;
254 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
255 bool SignedMode = Op1Signed || Op2Signed;
260 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
261 LT.second.getScalarType() == MVT::i32) {
263 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
265 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
266 bool Op1Sext = isa<SExtInst>(Args[0]) &&
267 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
268 bool Op2Sext = isa<SExtInst>(Args[1]) &&
269 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
271 bool IsZeroExtended = !Op1Signed || !Op2Signed;
272 bool IsConstant = Op1Constant || Op2Constant;
273 bool IsSext = Op1Sext || Op2Sext;
274 if (IsConstant || IsZeroExtended || IsSext)
282 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
285 if (!SignedMode && OpMinSize <= 8)
289 if (!SignedMode && OpMinSize <= 16)
296 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
349 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
350 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
351 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
352 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
353 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
354 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
355 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
356 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
357 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
359 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
360 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
361 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
362 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
363 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
364 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
368 if (
const auto *Entry =
370 if (
auto KindCost = Entry->Cost[
CostKind])
371 return LT.first * *KindCost;
374 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
375 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
376 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
378 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
379 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
380 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
382 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
383 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
384 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
385 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
386 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
387 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
389 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
390 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
391 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
392 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
393 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
394 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
395 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
404 if (
const auto *Entry =
406 if (
auto KindCost = Entry->Cost[
CostKind])
407 return LT.first * *KindCost;
410 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
411 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
412 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
413 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
414 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
415 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
417 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
418 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
419 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
420 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
421 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
422 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
424 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
425 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
426 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
427 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
428 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
429 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
431 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
432 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
433 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
434 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
435 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
436 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
445 if (
const auto *Entry =
447 if (
auto KindCost = Entry->Cost[
CostKind])
448 return LT.first * *KindCost;
451 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
452 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
453 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
454 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
455 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
456 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
458 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
459 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
460 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
461 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
462 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
463 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
465 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
466 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
467 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
468 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
469 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
470 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
472 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
473 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
474 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
475 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
476 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
477 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
487 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
488 if (
const auto *Entry =
490 if (
auto KindCost = Entry->Cost[
CostKind])
491 return LT.first * *KindCost;
494 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
495 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
496 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
498 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
499 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
500 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
502 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
503 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
504 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
506 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
507 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
508 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
518 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
519 if (
const auto *Entry =
521 if (
auto KindCost = Entry->Cost[
CostKind])
522 return LT.first * *KindCost;
537 if (
const auto *Entry =
539 if (
auto KindCost = Entry->Cost[
CostKind])
540 return LT.first * *KindCost;
560 if (
const auto *Entry =
562 if (
auto KindCost = Entry->Cost[
CostKind])
563 return LT.first * *KindCost;
583 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
584 if (
auto KindCost = Entry->Cost[
CostKind])
585 return LT.first * *KindCost;
605 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
606 if (
auto KindCost = Entry->Cost[
CostKind])
607 return LT.first * *KindCost;
615 if (
const auto *Entry =
617 if (
auto KindCost = Entry->Cost[
CostKind])
618 return LT.first * *KindCost;
638 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
639 if (
auto KindCost = Entry->Cost[
CostKind])
640 return LT.first * *KindCost;
643 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
644 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
645 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
646 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
647 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
648 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
649 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
650 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
651 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
653 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
654 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
655 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
659 if (
const auto *Entry =
661 if (
auto KindCost = Entry->Cost[
CostKind])
662 return LT.first * *KindCost;
665 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
666 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
667 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
669 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
670 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
671 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
673 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
674 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
675 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
676 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
677 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
678 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
679 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
683 if (
const auto *Entry =
685 if (
auto KindCost = Entry->Cost[
CostKind])
686 return LT.first * *KindCost;
690 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
691 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
692 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
693 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
694 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
695 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
697 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
698 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
699 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
700 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
701 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
702 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
704 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
705 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
706 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
707 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
708 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
709 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
711 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
712 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
713 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
714 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
715 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
716 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
720 if (
const auto *Entry =
722 if (
auto KindCost = Entry->Cost[
CostKind])
723 return LT.first * *KindCost;
726 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
727 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
728 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
729 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
730 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
731 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
733 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
734 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
735 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
736 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
737 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
738 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
740 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
741 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
742 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
743 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
744 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
745 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
747 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
748 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
749 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
750 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
751 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
752 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
757 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
758 if (
const auto *Entry =
760 if (
auto KindCost = Entry->Cost[
CostKind])
761 return LT.first * *KindCost;
765 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
766 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
767 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
769 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
770 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
771 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
773 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
774 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
775 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
777 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
778 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
779 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
783 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
784 if (
const auto *Entry =
786 if (
auto KindCost = Entry->Cost[
CostKind])
787 return LT.first * *KindCost;
790 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
791 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
792 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
797 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
798 if (
auto KindCost = Entry->Cost[
CostKind])
799 return LT.first * *KindCost;
802 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
803 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
804 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
805 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
806 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
807 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
808 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
809 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
810 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
812 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
813 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
814 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
815 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
816 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
817 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
818 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
819 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
820 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
822 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
823 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
825 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
826 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
827 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
828 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
830 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
831 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
833 {
ISD::MUL, MVT::v64i8, { 5, 10,10,11 } },
834 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
836 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
837 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
838 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
839 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
844 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
845 if (
auto KindCost = Entry->Cost[
CostKind])
846 return LT.first * *KindCost;
849 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
850 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
851 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
853 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
854 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
855 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
857 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
858 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
859 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
860 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
861 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
862 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
863 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
864 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
865 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
867 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
868 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
869 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
870 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
871 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
872 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
873 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
874 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
875 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
877 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
878 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
880 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
881 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
883 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
884 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
885 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
886 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
888 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
889 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
890 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
891 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
893 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
894 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
895 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
896 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
898 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
899 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
900 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
901 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
906 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
907 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
908 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
909 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
910 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
911 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
912 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
913 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
916 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
917 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
918 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
919 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
921 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
922 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
923 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
924 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
925 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
926 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
927 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
928 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
931 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
932 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
933 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
934 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
938 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
939 if (
auto KindCost = Entry->Cost[
CostKind])
940 return LT.first * *KindCost;
945 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
946 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
947 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
948 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
949 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
950 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
951 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
952 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
953 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
954 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
966 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
967 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
974 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
975 if (
auto KindCost = Entry->Cost[
CostKind])
976 return LT.first * *KindCost;
981 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
982 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
983 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
984 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
985 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
986 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
987 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
988 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
989 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
990 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
991 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
992 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
994 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
995 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
996 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
997 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
998 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
999 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1000 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1001 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1002 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1003 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1004 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1005 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1015 if (
const auto *Entry =
1017 if (
auto KindCost = Entry->Cost[
CostKind])
1018 return LT.first * *KindCost;
1025 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1026 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1031 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1032 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1033 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1034 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1037 if (ST->useGLMDivSqrtCosts())
1038 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1039 if (
auto KindCost = Entry->Cost[
CostKind])
1040 return LT.first * *KindCost;
1043 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1044 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1045 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1046 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1047 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1048 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1049 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1050 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1051 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1052 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1053 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1054 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1060 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1062 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1063 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1066 if (ST->useSLMArithCosts())
1067 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1068 if (
auto KindCost = Entry->Cost[
CostKind])
1069 return LT.first * *KindCost;
1072 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1073 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1074 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1075 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1077 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1078 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1079 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1080 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1082 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1083 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1084 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1085 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1086 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1087 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1089 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1090 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1091 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1092 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1093 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1094 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1095 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1096 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1098 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1099 {
ISD::MUL, MVT::v32i8, { 6, 11,10,19 } },
1100 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1101 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1102 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1103 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1104 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1108 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1109 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1111 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1112 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1113 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1114 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1115 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1116 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1118 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1119 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1120 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1121 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1122 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1123 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1125 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1126 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1127 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1128 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1129 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1130 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1132 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1133 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1134 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1135 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1136 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1137 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1142 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1143 if (
auto KindCost = Entry->Cost[
CostKind])
1144 return LT.first * *KindCost;
1150 {
ISD::MUL, MVT::v32i8, { 12, 13, 22, 23 } },
1151 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1152 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1153 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1154 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1156 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1157 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1158 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1159 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1161 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1162 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1163 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1164 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1166 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1167 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1168 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1169 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1171 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1172 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1173 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1174 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1175 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1176 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1177 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1178 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1179 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1180 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1182 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1183 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1184 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1185 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1186 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1187 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1188 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1189 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1191 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1192 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1193 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1194 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1195 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1196 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1197 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1198 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1200 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1201 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1202 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1203 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1204 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1205 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1206 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1207 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1209 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1210 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1212 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1213 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1214 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1215 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1216 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1217 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1219 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1220 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1221 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1222 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1223 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1224 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1226 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1227 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1228 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1229 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1230 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1231 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1233 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1234 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1235 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1236 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1237 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1238 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1242 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1243 if (
auto KindCost = Entry->Cost[
CostKind])
1244 return LT.first * *KindCost;
1247 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1248 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1249 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1250 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1252 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1253 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1254 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1255 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1257 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1258 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1259 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1260 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1262 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1263 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1264 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1265 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1267 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1271 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1272 if (
auto KindCost = Entry->Cost[
CostKind])
1273 return LT.first * *KindCost;
1276 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1277 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1278 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1280 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1281 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1282 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1283 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1285 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1286 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1287 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1288 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1290 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1291 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1295 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1296 if (
auto KindCost = Entry->Cost[
CostKind])
1297 return LT.first * *KindCost;
1302 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1303 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1304 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1305 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1307 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1308 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1309 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1310 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1312 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1313 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1314 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1315 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1317 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1318 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1319 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1320 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1322 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1323 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1324 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1325 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1327 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1328 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1329 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1330 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1332 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1333 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1335 {
ISD::MUL, MVT::v16i8, { 5, 18,12,12 } },
1336 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1337 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1338 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1342 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1343 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1344 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1345 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1347 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1348 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1349 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1350 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1352 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1353 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1354 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1356 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1357 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1358 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1360 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1361 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1365 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1366 if (
auto KindCost = Entry->Cost[
CostKind])
1367 return LT.first * *KindCost;
1370 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1371 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1373 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1374 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1376 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1377 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1379 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1380 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1382 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1383 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1387 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1388 if (
auto KindCost = Entry->Cost[
CostKind])
1389 return LT.first * *KindCost;
1394 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1399 if (
auto KindCost = Entry->Cost[
CostKind])
1400 return LT.first * *KindCost;
1411 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1412 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1413 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1415 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1416 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1417 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1418 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1419 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1423 if (
auto KindCost = Entry->Cost[
CostKind])
1424 return LT.first * *KindCost;
1438 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1489 CostKind, Mask.size() / 2, BaseTp);
1502 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1503 LT.second = LT.second.changeVectorElementType(MVT::f16);
1508 int NumElts = LT.second.getVectorNumElements();
1509 if ((
Index % NumElts) == 0)
1512 if (SubLT.second.isVector()) {
1513 int NumSubElts = SubLT.second.getVectorNumElements();
1514 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1522 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1523 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
1524 (NumSubElts % OrigSubElts) == 0 &&
1525 LT.second.getVectorElementType() ==
1526 SubLT.second.getVectorElementType() &&
1527 LT.second.getVectorElementType().getSizeInBits() ==
1529 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1530 "Unexpected number of elements!");
1532 LT.second.getVectorNumElements());
1534 SubLT.second.getVectorNumElements());
1543 return ExtractCost + 1;
1546 "Unexpected vector size");
1548 return ExtractCost + 2;
1559 int NumElts = LT.second.getVectorNumElements();
1561 if (SubLT.second.isVector()) {
1562 int NumSubElts = SubLT.second.getVectorNumElements();
1563 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1576 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1607 if (
const auto *Entry =
1616 MVT LegalVT = LT.second;
1621 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1625 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1632 if (!Mask.empty() && NumOfDests.
isValid()) {
1650 unsigned E = *NumOfDests.
getValue();
1651 unsigned NormalizedVF =
1657 unsigned PrevSrcReg = 0;
1661 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1662 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1667 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1668 PrevRegMask != RegMask)
1676 if (SrcReg != DestReg &&
1681 PrevSrcReg = SrcReg;
1682 PrevRegMask = RegMask;
1695 std::nullopt,
CostKind, 0,
nullptr);
1706 LT.first = NumOfDests * NumOfShufflesPerDest;
1722 if (
const auto *Entry =
1724 return LT.first * Entry->Cost;
1757 if (
const auto *Entry =
1759 return LT.first * Entry->Cost;
1836 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1837 if (
auto KindCost = Entry->Cost[
CostKind])
1838 return LT.first * *KindCost;
1891 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1892 return LT.first * Entry->Cost;
1913 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
1914 return LT.first * Entry->Cost;
1976 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
1977 return LT.first * Entry->Cost;
1990 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
1991 return LT.first * Entry->Cost;
2022 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2023 return LT.first * Entry->Cost;
2079 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2081 if (
const auto *Entry =
2084 LT.second.getVectorElementCount()) &&
2085 "Table entry missing from isLegalBroadcastLoad()");
2086 return LT.first * Entry->Cost;
2089 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2090 return LT.first * Entry->Cost;
2103 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2104 return LT.first * Entry->Cost;
2115 assert(ISD &&
"Invalid opcode");
2120 return Cost == 0 ? 0 : 1;
2935 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2936 return AdjustCost(Entry->Cost);
2940 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2941 return AdjustCost(Entry->Cost);
2945 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2946 return AdjustCost(Entry->Cost);
2951 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2952 return AdjustCost(Entry->Cost);
2956 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2957 return AdjustCost(Entry->Cost);
2961 SimpleDstTy, SimpleSrcTy))
2962 return AdjustCost(Entry->Cost);
2966 SimpleDstTy, SimpleSrcTy))
2967 return AdjustCost(Entry->Cost);
2972 SimpleDstTy, SimpleSrcTy))
2973 return AdjustCost(Entry->Cost);
2978 SimpleDstTy, SimpleSrcTy))
2979 return AdjustCost(Entry->Cost);
2984 SimpleDstTy, SimpleSrcTy))
2985 return AdjustCost(Entry->Cost);
3000 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3001 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3005 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3006 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3010 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3011 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3016 LTDest.second, LTSrc.second))
3017 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3021 LTDest.second, LTSrc.second))
3022 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3026 LTDest.second, LTSrc.second))
3027 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3031 LTDest.second, LTSrc.second))
3032 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3036 LTDest.second, LTSrc.second))
3037 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3041 LTDest.second, LTSrc.second))
3042 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3046 LTDest.second, LTSrc.second))
3047 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3052 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3053 Type *ExtSrc = Src->getWithNewBitWidth(32);
3059 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3069 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3070 Type *TruncDst = Dst->getWithNewBitWidth(32);
3093 MVT MTy = LT.second;
3096 assert(ISD &&
"Invalid opcode");
3099 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3112 Pred = cast<CmpInst>(
I)->getPredicate();
3114 bool CmpWithConstant =
false;
3115 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3116 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3121 ExtraCost = CmpWithConstant ? 0 : 1;
3126 ExtraCost = CmpWithConstant ? 0 : 1;
3132 ExtraCost = CmpWithConstant ? 1 : 2;
3143 ExtraCost = CmpWithConstant ? 2 : 3;
3150 if (CondTy && !ST->
hasAVX())
3319 if (ST->useSLMArithCosts())
3321 if (
auto KindCost = Entry->Cost[
CostKind])
3322 return LT.first * (ExtraCost + *KindCost);
3326 if (
auto KindCost = Entry->Cost[
CostKind])
3327 return LT.first * (ExtraCost + *KindCost);
3331 if (
auto KindCost = Entry->Cost[
CostKind])
3332 return LT.first * (ExtraCost + *KindCost);
3336 if (
auto KindCost = Entry->Cost[
CostKind])
3337 return LT.first * (ExtraCost + *KindCost);
3341 if (
auto KindCost = Entry->Cost[
CostKind])
3342 return LT.first * (ExtraCost + *KindCost);
3346 if (
auto KindCost = Entry->Cost[
CostKind])
3347 return LT.first * (ExtraCost + *KindCost);
3351 if (
auto KindCost = Entry->Cost[
CostKind])
3352 return LT.first * (ExtraCost + *KindCost);
3356 if (
auto KindCost = Entry->Cost[
CostKind])
3357 return LT.first * (ExtraCost + *KindCost);
3361 if (
auto KindCost = Entry->Cost[
CostKind])
3362 return LT.first * (ExtraCost + *KindCost);
3366 if (
auto KindCost = Entry->Cost[
CostKind])
3367 return LT.first * (ExtraCost + *KindCost);
3392 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3393 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3394 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3395 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3396 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3397 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3398 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3399 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3400 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3401 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3402 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3403 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3404 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3405 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3406 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3425 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3426 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3427 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3428 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3429 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3430 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3431 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3432 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3433 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3434 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3435 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3436 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3438 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3439 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3440 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3441 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3442 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3443 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3446 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3447 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3469 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3470 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3471 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3472 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3473 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3474 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3475 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3476 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3477 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3478 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3479 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3480 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3481 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3485 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3486 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3487 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3488 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3489 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3490 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3491 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3492 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3493 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3494 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3495 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3496 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3497 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3498 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3499 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3500 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3501 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3502 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3505 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3506 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3507 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3508 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3513 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3514 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3515 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3516 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3521 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3522 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3523 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3524 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3525 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3526 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3527 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3528 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3529 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3537 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3538 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3539 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3540 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3541 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3542 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3543 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3544 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3545 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3546 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3547 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3548 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3549 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3550 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3551 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3552 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3553 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3554 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3555 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3556 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3557 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3558 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3559 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3560 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3561 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3562 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3563 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3564 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3565 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3566 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3567 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3568 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3569 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3570 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3571 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3572 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3573 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3574 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3575 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3576 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3577 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3578 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3579 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3580 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3581 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3582 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3583 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3584 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3612 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3615 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3616 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3632 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3633 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3634 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3635 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3636 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3637 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3638 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3639 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3640 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3641 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3642 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3643 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3644 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3645 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3646 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3647 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } }
3650 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3651 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3652 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3653 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3654 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3655 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3656 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3657 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3672 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3673 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3674 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3675 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3676 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3677 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3678 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3679 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3680 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3681 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3682 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3683 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3684 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3685 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3688 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3689 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3690 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3691 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3692 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3693 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3694 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3695 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3698 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3699 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3700 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3701 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3702 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3703 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3704 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3705 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3706 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3707 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3713 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3714 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3715 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3716 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3717 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3718 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3719 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3720 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3721 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3722 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3734 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
3736 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
3737 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
3740 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
3741 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
3742 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
3743 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
3756 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
3758 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
3759 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
3760 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
3761 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
3762 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
3763 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
3764 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
3765 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
3766 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
3767 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
3768 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
3769 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
3770 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
3771 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
3772 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
3773 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
3774 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
3775 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
3776 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
3777 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
3778 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
3779 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
3780 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
3781 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
3784 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
3785 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
3786 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3787 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3788 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3789 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
3790 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3791 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3792 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3793 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3799 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
3800 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
3801 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3802 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3803 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3804 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
3805 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
3806 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3807 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3808 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3819 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
3820 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
3822 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
3823 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
3827 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
3829 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
3836 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
3838 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
3848 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
3851 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
3852 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
3853 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3854 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3855 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3856 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3857 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3858 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
3859 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3860 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3861 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
3862 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3863 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3866 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
3867 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
3868 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
3876 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
3877 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
3878 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
3879 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
3880 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
3881 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
3882 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
3883 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
3884 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
3885 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
3886 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
3887 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
3890 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
3891 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
3892 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
3893 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
3898 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
3901 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
3902 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
3903 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
3904 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
3905 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
3906 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
3907 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
3908 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
3909 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
3910 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
3911 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
3912 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
3915 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
3916 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
3917 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3918 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
3919 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
3920 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
3921 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3922 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
3927 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
3928 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
3929 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
3930 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3931 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
3932 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
3933 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
3934 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3940 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
3946 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
3973 {
ISD::ABS, MVT::i64, { 1, 2, 3, 4 } },
3981 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
3982 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
3984 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
3985 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
3986 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
3987 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
3988 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
3994 {
ISD::ABS, MVT::i32, { 1, 2, 3, 4 } },
3995 {
ISD::ABS, MVT::i16, { 2, 2, 3, 4 } },
3996 {
ISD::ABS, MVT::i8, { 2, 4, 4, 4 } },
4017 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4018 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4020 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4021 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4026 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4027 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4029 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4030 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4032 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4033 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4035 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4036 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4038 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4039 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4059 case Intrinsic::abs:
4062 case Intrinsic::bitreverse:
4065 case Intrinsic::bswap:
4068 case Intrinsic::ctlz:
4071 case Intrinsic::ctpop:
4074 case Intrinsic::cttz:
4077 case Intrinsic::fshl:
4081 if (Args[0] == Args[1]) {
4085 if (isa_and_nonnull<ConstantInt>(Args[2]))
4090 case Intrinsic::fshr:
4095 if (Args[0] == Args[1]) {
4099 if (isa_and_nonnull<ConstantInt>(Args[2]))
4104 case Intrinsic::maxnum:
4105 case Intrinsic::minnum:
4109 case Intrinsic::sadd_sat:
4112 case Intrinsic::smax:
4115 case Intrinsic::smin:
4118 case Intrinsic::ssub_sat:
4121 case Intrinsic::uadd_sat:
4124 case Intrinsic::umax:
4127 case Intrinsic::umin:
4130 case Intrinsic::usub_sat:
4133 case Intrinsic::sqrt:
4136 case Intrinsic::sadd_with_overflow:
4137 case Intrinsic::ssub_with_overflow:
4140 OpTy =
RetTy->getContainedType(0);
4142 case Intrinsic::uadd_with_overflow:
4143 case Intrinsic::usub_with_overflow:
4146 OpTy =
RetTy->getContainedType(0);
4148 case Intrinsic::umul_with_overflow:
4149 case Intrinsic::smul_with_overflow:
4152 OpTy =
RetTy->getContainedType(0);
4159 MVT MTy = LT.second;
4175 return LT.first *
Cost;
4179 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4180 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4183 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4184 if (Cst->isAllOnesValue())
4192 auto adjustTableCost = [](
int ISD,
unsigned Cost,
4200 return LegalizationCost * 1;
4202 return LegalizationCost * (int)
Cost;
4205 if (ST->useGLMDivSqrtCosts())
4207 if (
auto KindCost = Entry->Cost[
CostKind])
4208 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4211 if (ST->useSLMArithCosts())
4213 if (
auto KindCost = Entry->Cost[
CostKind])
4214 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4218 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4219 if (
auto KindCost = Entry->Cost[
CostKind])
4220 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4223 if (ST->hasBITALG())
4224 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4225 if (
auto KindCost = Entry->Cost[
CostKind])
4226 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4229 if (ST->hasVPOPCNTDQ())
4230 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4231 if (
auto KindCost = Entry->Cost[
CostKind])
4232 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4237 if (
auto KindCost = Entry->Cost[
CostKind])
4238 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4243 if (
auto KindCost = Entry->Cost[
CostKind])
4244 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4249 if (
auto KindCost = Entry->Cost[
CostKind])
4250 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4255 if (
auto KindCost = Entry->Cost[
CostKind])
4256 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4261 if (
auto KindCost = Entry->Cost[
CostKind])
4262 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4267 if (
auto KindCost = Entry->Cost[
CostKind])
4268 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4273 if (
auto KindCost = Entry->Cost[
CostKind])
4274 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4279 if (
auto KindCost = Entry->Cost[
CostKind])
4280 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4285 if (
auto KindCost = Entry->Cost[
CostKind])
4286 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4291 if (
auto KindCost = Entry->Cost[
CostKind])
4292 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4297 if (
auto KindCost = Entry->Cost[
CostKind])
4298 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4304 if (
auto KindCost = Entry->Cost[
CostKind])
4305 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4309 if (
auto KindCost = Entry->Cost[
CostKind])
4310 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4314 if (ST->hasLZCNT()) {
4317 if (
auto KindCost = Entry->Cost[
CostKind])
4318 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4322 if (
auto KindCost = Entry->Cost[
CostKind])
4323 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4327 if (ST->hasPOPCNT()) {
4330 if (
auto KindCost = Entry->Cost[
CostKind])
4331 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4335 if (
auto KindCost = Entry->Cost[
CostKind])
4336 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4340 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4342 if (II->hasOneUse() && isa<StoreInst>(II->user_back()))
4344 if (
auto *LI = dyn_cast<LoadInst>(II->getOperand(0))) {
4345 if (LI->hasOneUse())
4353 if (
auto KindCost = Entry->Cost[
CostKind])
4354 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4358 if (
auto KindCost = Entry->Cost[
CostKind])
4359 return adjustTableCost(Entry->ISD, *KindCost, LT.first, ICA.
getFlags());
4382 if (
Index == -1U && (Opcode == Instruction::ExtractElement ||
4383 Opcode == Instruction::InsertElement)) {
4388 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4393 if (Opcode == Instruction::ExtractElement) {
4399 if (Opcode == Instruction::InsertElement) {
4407 if (
Index != -1U && (Opcode == Instruction::ExtractElement ||
4408 Opcode == Instruction::InsertElement)) {
4410 if (Opcode == Instruction::ExtractElement &&
4412 cast<FixedVectorType>(Val)->getNumElements() > 1)
4419 if (!LT.second.isVector())
4423 unsigned SizeInBits = LT.second.getSizeInBits();
4424 unsigned NumElts = LT.second.getVectorNumElements();
4425 unsigned SubNumElts = NumElts;
4430 if (SizeInBits > 128) {
4431 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4432 unsigned NumSubVecs = SizeInBits / 128;
4433 SubNumElts = NumElts / NumSubVecs;
4434 if (SubNumElts <=
Index) {
4435 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4436 Index %= SubNumElts;
4440 MVT MScalarTy = LT.second.getScalarType();
4441 auto IsCheapPInsrPExtrInsertPS = [&]() {
4444 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4446 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4447 Opcode == Instruction::InsertElement);
4455 (Opcode != Instruction::InsertElement || !Op0 ||
4456 isa<UndefValue>(Op0)))
4457 return RegisterFileMoveCost;
4459 if (Opcode == Instruction::InsertElement &&
4460 isa_and_nonnull<UndefValue>(Op0)) {
4462 if (isa_and_nonnull<LoadInst>(Op1))
4463 return RegisterFileMoveCost;
4464 if (!IsCheapPInsrPExtrInsertPS()) {
4467 return 2 + RegisterFileMoveCost;
4469 return 1 + RegisterFileMoveCost;
4474 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4475 return 1 + RegisterFileMoveCost;
4479 assert(ISD &&
"Unexpected vector opcode");
4480 if (ST->useSLMArithCosts())
4482 return Entry->Cost + RegisterFileMoveCost;
4485 if (IsCheapPInsrPExtrInsertPS())
4486 return 1 + RegisterFileMoveCost;
4495 if (Opcode == Instruction::InsertElement) {
4496 auto *SubTy = cast<VectorType>(Val);
4504 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4508 RegisterFileMoveCost;
4513 bool Insert,
bool Extract,
4516 cast<FixedVectorType>(Ty)->getNumElements() &&
4517 "Vector size mismatch");
4520 MVT MScalarTy = LT.second.getScalarType();
4521 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4524 constexpr unsigned LaneBitWidth = 128;
4525 assert((LegalVectorBitWidth < LaneBitWidth ||
4526 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4529 const int NumLegalVectors = *LT.first.getValue();
4530 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4535 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4537 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4540 if (LegalVectorBitWidth <= LaneBitWidth) {
4556 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4557 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4558 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4559 unsigned NumLegalElts =
4560 LT.second.getVectorNumElements() * NumLegalVectors;
4562 "Vector has been legalized to smaller element count");
4563 assert((NumLegalElts % NumLanesTotal) == 0 &&
4564 "Unexpected elts per lane");
4565 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4567 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4571 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4573 NumEltsPerLane, NumEltsPerLane *
I);
4574 if (LaneEltMask.
isZero())
4585 APInt AffectedLanes =
4588 AffectedLanes, NumLegalVectors,
true);
4589 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4590 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4591 unsigned I = NumLegalLanes * LegalVec + Lane;
4594 if (!AffectedLanes[
I] ||
4595 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4602 }
else if (LT.second.isVector()) {
4613 unsigned NumElts = LT.second.getVectorNumElements();
4615 PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements());
4616 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4625 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4626 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4627 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4631 if (LT.second.isVector()) {
4632 unsigned NumLegalElts =
4633 LT.second.getVectorNumElements() * NumLegalVectors;
4635 "Vector has been legalized to smaller element count");
4639 if (LegalVectorBitWidth > LaneBitWidth) {
4640 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4641 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4642 assert((NumLegalElts % NumLanesTotal) == 0 &&
4643 "Unexpected elts per lane");
4644 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4648 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4652 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4654 NumEltsPerLane,
I * NumEltsPerLane);
4655 if (LaneEltMask.
isZero())
4660 LaneTy, LaneEltMask,
false, Extract,
CostKind);
4677 int VF,
const APInt &DemandedDstElts,
4683 auto bailout = [&]() {
4693 unsigned PromEltTyBits = EltTyBits;
4694 switch (EltTyBits) {
4725 int NumDstElements = VF * ReplicationFactor;
4739 if (PromEltTyBits != EltTyBits) {
4745 Instruction::SExt, PromSrcVecTy, SrcVecTy,
4752 ReplicationFactor, VF,
4758 "We expect that the legalization doesn't affect the element width, "
4759 "doesn't coalesce/split elements.");
4762 unsigned NumDstVectors =
4763 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
4772 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
4773 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
4778 return NumDstVectorsDemanded * SingleShuffleCost;
4789 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
4792 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
4793 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
4800 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4810 auto *VTy = dyn_cast<FixedVectorType>(Src);
4815 if (Opcode == Instruction::Store && OpInfo.
isConstant())
4821 if (!VTy || !LT.second.isVector()) {
4823 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
4826 bool IsLoad = Opcode == Instruction::Load;
4828 Type *EltTy = VTy->getElementType();
4833 const unsigned SrcNumElt = VTy->getNumElements();
4836 int NumEltRemaining = SrcNumElt;
4838 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
4840 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
4843 const unsigned XMMBits = 128;
4844 if (XMMBits % EltTyBits != 0)
4848 const int NumEltPerXMM = XMMBits / EltTyBits;
4852 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
4853 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
4855 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
4859 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
4861 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
4862 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
4863 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
4864 "Unless we haven't halved the op size yet, "
4865 "we have less than two op's sized units of work left.");
4867 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
4871 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
4872 "After halving sizes, the vector elt count is no longer a multiple "
4873 "of number of elements per operation?");
4874 auto *CoalescedVecTy =
4875 CurrNumEltPerOp == 1
4879 EltTyBits * CurrNumEltPerOp),
4880 CurrVecTy->getNumElements() / CurrNumEltPerOp);
4883 "coalesciing elements doesn't change vector width.");
4885 while (NumEltRemaining > 0) {
4886 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
4890 if (NumEltRemaining < CurrNumEltPerOp &&
4891 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
4892 CurrOpSizeBytes != 1)
4895 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
4898 if (SubVecEltsLeft == 0) {
4899 SubVecEltsLeft += CurrVecTy->getNumElements();
4904 VTy, std::nullopt,
CostKind, NumEltDone(),
4912 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
4913 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
4914 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
4915 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
4916 APInt DemandedElts =
4918 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
4919 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
4929 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
4931 else if (CurrOpSizeBytes < 4)
4936 SubVecEltsLeft -= CurrNumEltPerOp;
4937 NumEltRemaining -= CurrNumEltPerOp;
4942 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
4951 bool IsLoad = (Instruction::Load == Opcode);
4952 bool IsStore = (Instruction::Store == Opcode);
4954 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
4959 unsigned NumElem = SrcVTy->getNumElements();
4967 MaskTy, DemandedElts,
false,
true,
CostKind);
4972 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
4974 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
4978 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
4985 if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
4986 LT.second.getVectorNumElements() == NumElem)
4993 else if (LT.first * LT.second.getVectorNumElements() > NumElem) {
4995 LT.second.getVectorNumElements());
5003 return Cost + LT.first * (IsLoad ? 2 : 8);
5006 return Cost + LT.first;
5014 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5018 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5020 return getGEPCost(BaseGEP->getSourceElementType(),
5021 BaseGEP->getPointerOperand(), Indices,
nullptr,
5036 const unsigned NumVectorInstToHideOverhead = 10;
5049 return NumVectorInstToHideOverhead;
5059 std::optional<FastMathFlags> FMF,
5100 assert(ISD &&
"Invalid opcode");
5108 if (ST->useSLMArithCosts())
5123 MVT MTy = LT.second;
5125 auto *ValVTy = cast<FixedVectorType>(ValTy);
5138 if (LT.first != 1 && MTy.
isVector() &&
5144 ArithmeticCost *= LT.first - 1;
5147 if (ST->useSLMArithCosts())
5149 return ArithmeticCost + Entry->Cost;
5153 return ArithmeticCost + Entry->Cost;
5157 return ArithmeticCost + Entry->Cost;
5206 if (ValVTy->getElementType()->isIntegerTy(1)) {
5208 if (LT.first != 1 && MTy.
isVector() &&
5214 ArithmeticCost *= LT.first - 1;
5218 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5219 return ArithmeticCost + Entry->Cost;
5222 return ArithmeticCost + Entry->Cost;
5225 return ArithmeticCost + Entry->Cost;
5228 return ArithmeticCost + Entry->Cost;
5233 unsigned NumVecElts = ValVTy->getNumElements();
5234 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5244 if (LT.first != 1 && MTy.
isVector() &&
5250 ReductionCost *= LT.first - 1;
5256 while (NumVecElts > 1) {
5258 unsigned Size = NumVecElts * ScalarSize;
5267 }
else if (
Size == 128) {
5270 if (ValVTy->isFloatingPointTy())
5277 std::nullopt,
CostKind, 0,
nullptr);
5278 }
else if (
Size == 64) {
5281 if (ValVTy->isFloatingPointTy())
5288 std::nullopt,
CostKind, 0,
nullptr);
5294 Instruction::LShr, ShiftTy,
CostKind,
5321 MVT MTy = LT.second;
5325 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5329 "Expected float point or integer vector type.");
5330 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5398 auto *ValVTy = cast<FixedVectorType>(ValTy);
5399 unsigned NumVecElts = ValVTy->getNumElements();
5403 if (LT.first != 1 && MTy.
isVector() &&
5409 MinMaxCost *= LT.first - 1;
5415 return MinMaxCost + Entry->Cost;
5419 return MinMaxCost + Entry->Cost;
5423 return MinMaxCost + Entry->Cost;
5427 return MinMaxCost + Entry->Cost;
5439 while (NumVecElts > 1) {
5441 unsigned Size = NumVecElts * ScalarSize;
5449 }
else if (
Size == 128) {
5458 std::nullopt,
CostKind, 0,
nullptr);
5459 }
else if (
Size == 64) {
5467 std::nullopt,
CostKind, 0,
nullptr);
5520 if (BitSize % 64 != 0)
5521 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5526 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5532 return std::max<InstructionCost>(1,
Cost);
5547 unsigned ImmIdx = ~0U;
5551 case Instruction::GetElementPtr:
5558 case Instruction::Store:
5561 case Instruction::ICmp:
5567 if (
Idx == 1 && Imm.getBitWidth() == 64) {
5568 uint64_t ImmVal = Imm.getZExtValue();
5569 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5574 case Instruction::And:
5578 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.isIntN(32))
5582 case Instruction::Add:
5583 case Instruction::Sub:
5585 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
5589 case Instruction::UDiv:
5590 case Instruction::SDiv:
5591 case Instruction::URem:
5592 case Instruction::SRem:
5597 case Instruction::Mul:
5598 case Instruction::Or:
5599 case Instruction::Xor:
5603 case Instruction::Shl:
5604 case Instruction::LShr:
5605 case Instruction::AShr:
5609 case Instruction::Trunc:
5610 case Instruction::ZExt:
5611 case Instruction::SExt:
5612 case Instruction::IntToPtr:
5613 case Instruction::PtrToInt:
5614 case Instruction::BitCast:
5615 case Instruction::PHI:
5616 case Instruction::Call:
5617 case Instruction::Select:
5618 case Instruction::Ret:
5619 case Instruction::Load:
5623 if (
Idx == ImmIdx) {
5648 case Intrinsic::sadd_with_overflow:
5649 case Intrinsic::uadd_with_overflow:
5650 case Intrinsic::ssub_with_overflow:
5651 case Intrinsic::usub_with_overflow:
5652 case Intrinsic::smul_with_overflow:
5653 case Intrinsic::umul_with_overflow:
5654 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
5657 case Intrinsic::experimental_stackmap:
5658 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5661 case Intrinsic::experimental_patchpoint_void:
5662 case Intrinsic::experimental_patchpoint:
5663 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5674 return Opcode == Instruction::PHI ? 0 : 1;
5679int X86TTIImpl::getGatherOverhead()
const {
5692int X86TTIImpl::getScatterOverhead()
const {
5705 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
5706 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5716 if (IndexSize < 64 || !
GEP)
5719 unsigned NumOfVarIndices = 0;
5720 const Value *Ptrs =
GEP->getPointerOperand();
5723 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
5724 if (isa<Constant>(
GEP->getOperand(
I)))
5726 Type *IndxTy =
GEP->getOperand(
I)->getType();
5727 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
5728 IndxTy = IndexVTy->getElementType();
5730 !isa<SExtInst>(
GEP->getOperand(
I))) ||
5731 ++NumOfVarIndices > 1)
5734 return (
unsigned)32;
5739 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
5740 ? getIndexSizeInBits(
Ptr,
DL)
5748 *std::max(IdxsLT.first, SrcLT.first).getValue();
5749 if (SplitFactor > 1) {
5753 return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy,
Ptr, Alignment,
5759 const int GSOverhead = (Opcode == Instruction::Load)
5760 ? getGatherOverhead()
5761 : getScatterOverhead();
5777 bool VariableMask,
Align Alignment,
5780 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5789 MaskTy, DemandedElts,
false,
true,
CostKind);
5794 MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
5799 DemandedElts,
false,
true,
CostKind);
5809 cast<FixedVectorType>(SrcVTy), DemandedElts,
5810 Opcode == Instruction::Load,
5811 Opcode == Instruction::Store,
CostKind);
5813 return AddressUnpackCost + MemoryOpCost + MaskUnpackCost + InsertExtractCost;
5818 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
5822 if ((Opcode == Instruction::Load &&
5825 Align(Alignment))) ||
5826 (Opcode == Instruction::Store &&
5837 if (!PtrTy &&
Ptr->getType()->isVectorTy())
5838 PtrTy = dyn_cast<PointerType>(
5839 cast<VectorType>(
Ptr->getType())->getElementType());
5840 assert(PtrTy &&
"Unexpected type for Ptr argument");
5843 if ((Opcode == Instruction::Load &&
5846 Align(Alignment)))) ||
5847 (Opcode == Instruction::Store &&
5850 Align(Alignment)))))
5851 return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment,
5869 return ST->hasMacroFusion() || ST->hasBranchFusion();
5877 if (isa<VectorType>(DataTy) &&
5878 cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5888 if (ScalarTy->
isHalfTy() && ST->hasBWI())
5898 return IntWidth == 32 || IntWidth == 64 ||
5899 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
5911 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
5928 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
5950 if (!isa<VectorType>(DataTy))
5957 if (cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5960 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
5969 return IntWidth == 32 || IntWidth == 64 ||
5970 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
5977bool X86TTIImpl::supportsGather()
const {
5991 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
5992 return NumElts == 1 ||
5993 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6008 return IntWidth == 32 || IntWidth == 64;
6012 if (!supportsGather() || !ST->preferGather())
6027 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6028 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6033 for (
int Lane : seq<int>(0, NumElements)) {
6034 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6036 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6038 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6042 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6044 return ST->
hasSSE3() && NumElements % 4 == 0;
6046 return ST->
hasSSE3() && NumElements % 2 == 0;
6052 if (!ST->
hasAVX512() || !ST->preferScatter())
6065 if (
I->getOpcode() == Instruction::FDiv)
6081 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6083 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6086 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6087 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6088 if (RealCallerBits == RealCalleeBits)
6093 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6097 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6099 if (CB->isInlineAsm())
6103 for (
Value *Arg : CB->args())
6104 Types.push_back(Arg->getType());
6105 if (!CB->getType()->isVoidTy())
6106 Types.push_back(CB->getType());
6109 auto IsSimpleTy = [](
Type *Ty) {
6110 return !Ty->isVectorTy() && !Ty->isAggregateType();
6112 if (
all_of(Types, IsSimpleTy))
6115 if (
Function *NestedCallee = CB->getCalledFunction()) {
6117 if (NestedCallee->isIntrinsic())
6152 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6161 Options.AllowOverlappingLoads =
true;
6166 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6167 Options.LoadSizes.push_back(64);
6168 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6169 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6171 if (ST->is64Bit()) {
6172 Options.LoadSizes.push_back(8);
6174 Options.LoadSizes.push_back(4);
6175 Options.LoadSizes.push_back(2);
6176 Options.LoadSizes.push_back(1);
6181 return supportsGather();
6192 return !(ST->isAtom());
6212 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6218 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6230 if (UseMaskedMemOp) {
6232 for (
unsigned Index : Indices) {
6233 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
6234 for (
unsigned Elm = 0; Elm < VF; Elm++)
6235 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
6242 UseMaskForGaps ? DemandedLoadStoreElts
6251 if (UseMaskForGaps) {
6257 if (Opcode == Instruction::Load) {
6264 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6265 {3, MVT::v16i8, 12},
6266 {3, MVT::v32i8, 14},
6267 {3, MVT::v64i8, 22},
6270 if (
const auto *Entry =
6272 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6282 ShuffleKind, SingleMemOpTy, std::nullopt,
CostKind, 0,
nullptr);
6284 unsigned NumOfLoadsInInterleaveGrp =
6285 Indices.
size() ? Indices.
size() : Factor;
6294 unsigned NumOfUnfoldedLoads =
6295 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6298 unsigned NumOfShufflesPerResult =
6299 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6306 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6309 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6316 assert(Opcode == Instruction::Store &&
6317 "Expected Store Instruction at this point");
6319 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6320 {3, MVT::v16i8, 12},
6321 {3, MVT::v32i8, 14},
6322 {3, MVT::v64i8, 26},
6325 {4, MVT::v16i8, 11},
6326 {4, MVT::v32i8, 14},
6330 if (
const auto *Entry =
6332 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6337 unsigned NumOfSources = Factor;
6340 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6344 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6347 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6355 bool UseMaskForCond,
bool UseMaskForGaps) {
6356 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6358 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6359 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6364 return ST->hasBWI();
6366 return ST->hasBF16();
6369 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6371 Opcode, VecTy, Factor, Indices, Alignment,
6374 if (UseMaskForCond || UseMaskForGaps)
6377 UseMaskForCond, UseMaskForGaps);
6397 unsigned VF = VecTy->getNumElements() / Factor;
6398 Type *ScalarTy = VecTy->getElementType();
6430 {2, MVT::v16i16, 9},
6431 {2, MVT::v32i16, 18},
6434 {2, MVT::v16i32, 8},
6435 {2, MVT::v32i32, 16},
6439 {2, MVT::v16i64, 16},
6440 {2, MVT::v32i64, 32},
6445 {3, MVT::v16i8, 11},
6446 {3, MVT::v32i8, 14},
6451 {3, MVT::v16i16, 28},
6452 {3, MVT::v32i16, 56},
6457 {3, MVT::v16i32, 14},
6458 {3, MVT::v32i32, 32},
6462 {3, MVT::v8i64, 10},
6463 {3, MVT::v16i64, 20},
6468 {4, MVT::v16i8, 24},
6469 {4, MVT::v32i8, 56},
6472 {4, MVT::v4i16, 17},
6473 {4, MVT::v8i16, 33},
6474 {4, MVT::v16i16, 75},
6475 {4, MVT::v32i16, 150},
6479 {4, MVT::v8i32, 16},
6480 {4, MVT::v16i32, 32},
6481 {4, MVT::v32i32, 68},
6485 {4, MVT::v8i64, 20},
6486 {4, MVT::v16i64, 40},
6491 {6, MVT::v16i8, 43},
6492 {6, MVT::v32i8, 82},
6494 {6, MVT::v2i16, 13},
6496 {6, MVT::v8i16, 39},
6497 {6, MVT::v16i16, 106},
6498 {6, MVT::v32i16, 212},
6501 {6, MVT::v4i32, 15},
6502 {6, MVT::v8i32, 31},
6503 {6, MVT::v16i32, 64},
6506 {6, MVT::v4i64, 18},
6507 {6, MVT::v8i64, 36},
6512 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6526 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6531 {2, MVT::v16i16, 4},
6532 {2, MVT::v32i16, 8},
6536 {2, MVT::v16i32, 8},
6537 {2, MVT::v32i32, 16},
6542 {2, MVT::v16i64, 16},
6543 {2, MVT::v32i64, 32},
6548 {3, MVT::v16i8, 11},
6549 {3, MVT::v32i8, 13},
6553 {3, MVT::v8i16, 12},
6554 {3, MVT::v16i16, 27},
6555 {3, MVT::v32i16, 54},
6559 {3, MVT::v8i32, 11},
6560 {3, MVT::v16i32, 22},
6561 {3, MVT::v32i32, 48},
6565 {3, MVT::v8i64, 12},
6566 {3, MVT::v16i64, 24},
6572 {4, MVT::v32i8, 12},
6576 {4, MVT::v8i16, 10},
6577 {4, MVT::v16i16, 32},
6578 {4, MVT::v32i16, 64},
6582 {4, MVT::v8i32, 16},
6583 {4, MVT::v16i32, 32},
6584 {4, MVT::v32i32, 64},
6588 {4, MVT::v8i64, 20},
6589 {4, MVT::v16i64, 40},
6594 {6, MVT::v16i8, 27},
6595 {6, MVT::v32i8, 90},
6597 {6, MVT::v2i16, 10},
6598 {6, MVT::v4i16, 15},
6599 {6, MVT::v8i16, 21},
6600 {6, MVT::v16i16, 58},
6601 {6, MVT::v32i16, 90},
6604 {6, MVT::v4i32, 12},
6605 {6, MVT::v8i32, 33},
6606 {6, MVT::v16i32, 66},
6609 {6, MVT::v4i64, 15},
6610 {6, MVT::v8i64, 30},
6613 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6624 if (Opcode == Instruction::Load) {
6625 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6629 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6633 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6635 return GetDiscountedCost(Entry);
6638 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6640 return GetDiscountedCost(Entry);
6643 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6645 return GetDiscountedCost(Entry);
6647 assert(Opcode == Instruction::Store &&
6648 "Expected Store Instruction at this point");
6650 "Interleaved store only supports fully-interleaved groups.");
6652 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6654 return MemOpCosts + Entry->Cost;
6657 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6659 return MemOpCosts + Entry->Cost;
6664 UseMaskForCond, UseMaskForGaps);
6669 bool HasBaseReg, int64_t Scale,
6670 unsigned AddrSpace)
const {
6697 return AM.
Scale != 0;
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.