63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
164 bool Vector = (ClassID == 1);
183 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
185 if (ST->
hasAVX() && PreferVectorWidth >= 256)
187 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
228 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
245 assert(ISD &&
"Invalid opcode");
247 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
248 (LT.second.getScalarType() == MVT::i32 ||
249 LT.second.getScalarType() == MVT::i64)) {
251 bool Op1Signed =
false, Op2Signed =
false;
254 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
255 bool SignedMode = Op1Signed || Op2Signed;
260 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
261 LT.second.getScalarType() == MVT::i32) {
263 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
265 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
266 bool Op1Sext = isa<SExtInst>(Args[0]) &&
267 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
268 bool Op2Sext = isa<SExtInst>(Args[1]) &&
269 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
271 bool IsZeroExtended = !Op1Signed || !Op2Signed;
272 bool IsConstant = Op1Constant || Op2Constant;
273 bool IsSext = Op1Sext || Op2Sext;
274 if (IsConstant || IsZeroExtended || IsSext)
282 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
285 if (!SignedMode && OpMinSize <= 8)
289 if (!SignedMode && OpMinSize <= 16)
296 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
349 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
350 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
351 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
352 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
353 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
354 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
355 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
356 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
357 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
359 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
360 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
361 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
362 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
363 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
364 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
368 if (
const auto *Entry =
370 if (
auto KindCost = Entry->Cost[
CostKind])
371 return LT.first * *KindCost;
374 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
375 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
376 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
378 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
379 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
380 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
382 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
383 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
384 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
385 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
386 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
387 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
389 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
390 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
391 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
392 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
393 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
394 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
395 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
404 if (
const auto *Entry =
406 if (
auto KindCost = Entry->Cost[
CostKind])
407 return LT.first * *KindCost;
410 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
411 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
412 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
413 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
414 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
415 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
417 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
418 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
419 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
420 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
421 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
422 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
424 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
425 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
426 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
427 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
428 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
429 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
431 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
432 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
433 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
434 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
435 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
436 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
445 if (
const auto *Entry =
447 if (
auto KindCost = Entry->Cost[
CostKind])
448 return LT.first * *KindCost;
451 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
452 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
453 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
454 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
455 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
456 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
458 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
459 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
460 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
461 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
462 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
463 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
465 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
466 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
467 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
468 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
469 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
470 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
472 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
473 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
474 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
475 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
476 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
477 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
487 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
488 if (
const auto *Entry =
490 if (
auto KindCost = Entry->Cost[
CostKind])
491 return LT.first * *KindCost;
494 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
495 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
496 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
498 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
499 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
500 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
502 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
503 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
504 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
506 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
507 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
508 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
518 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
519 if (
const auto *Entry =
521 if (
auto KindCost = Entry->Cost[
CostKind])
522 return LT.first * *KindCost;
537 if (
const auto *Entry =
539 if (
auto KindCost = Entry->Cost[
CostKind])
540 return LT.first * *KindCost;
560 if (
const auto *Entry =
562 if (
auto KindCost = Entry->Cost[
CostKind])
563 return LT.first * *KindCost;
583 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
584 if (
auto KindCost = Entry->Cost[
CostKind])
585 return LT.first * *KindCost;
605 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
606 if (
auto KindCost = Entry->Cost[
CostKind])
607 return LT.first * *KindCost;
615 if (
const auto *Entry =
617 if (
auto KindCost = Entry->Cost[
CostKind])
618 return LT.first * *KindCost;
638 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
639 if (
auto KindCost = Entry->Cost[
CostKind])
640 return LT.first * *KindCost;
643 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
644 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
645 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
646 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
647 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
648 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
649 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
650 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
651 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
653 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
654 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
655 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
659 if (
const auto *Entry =
661 if (
auto KindCost = Entry->Cost[
CostKind])
662 return LT.first * *KindCost;
665 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
666 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
667 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
669 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
670 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
671 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
673 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
674 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
675 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
676 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
677 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
678 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
679 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
683 if (
const auto *Entry =
685 if (
auto KindCost = Entry->Cost[
CostKind])
686 return LT.first * *KindCost;
690 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
691 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
692 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
693 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
694 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
695 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
697 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
698 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
699 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
700 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
701 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
702 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
704 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
705 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
706 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
707 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
708 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
709 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
711 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
712 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
713 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
714 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
715 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
716 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
720 if (
const auto *Entry =
722 if (
auto KindCost = Entry->Cost[
CostKind])
723 return LT.first * *KindCost;
726 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
727 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
728 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
729 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
730 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
731 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
733 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
734 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
735 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
736 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
737 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
738 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
740 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
741 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
742 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
743 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
744 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
745 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
747 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
748 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
749 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
750 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
751 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
752 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
757 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
758 if (
const auto *Entry =
760 if (
auto KindCost = Entry->Cost[
CostKind])
761 return LT.first * *KindCost;
765 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
766 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
767 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
769 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
770 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
771 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
773 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
774 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
775 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
777 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
778 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
779 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
783 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
784 if (
const auto *Entry =
786 if (
auto KindCost = Entry->Cost[
CostKind])
787 return LT.first * *KindCost;
790 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
791 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
792 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
797 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
798 if (
auto KindCost = Entry->Cost[
CostKind])
799 return LT.first * *KindCost;
802 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
803 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
804 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
805 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
806 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
807 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
808 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
809 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
810 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
812 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
813 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
814 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
815 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
816 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
817 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
818 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
819 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
820 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
822 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
823 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
825 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
826 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
827 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
828 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
830 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
831 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
833 {
ISD::MUL, MVT::v64i8, { 5, 10,10,11 } },
834 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
836 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
837 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
838 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
839 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
844 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
845 if (
auto KindCost = Entry->Cost[
CostKind])
846 return LT.first * *KindCost;
849 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
850 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
851 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
853 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
854 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
855 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
857 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
858 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
859 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
860 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
861 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
862 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
863 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
864 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
865 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
867 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
868 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
869 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
870 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
871 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
872 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
873 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
874 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
875 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
877 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
878 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
880 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
881 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
883 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
884 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
885 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
886 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
888 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
889 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
890 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
891 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
893 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
894 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
895 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
896 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
898 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
899 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
900 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
901 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
906 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
907 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
908 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
909 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
910 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
911 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
912 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
913 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
916 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
917 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
918 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
919 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
921 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
922 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
923 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
924 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
925 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
926 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
927 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
928 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
931 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
932 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
933 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
934 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
938 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
939 if (
auto KindCost = Entry->Cost[
CostKind])
940 return LT.first * *KindCost;
945 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
946 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
947 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
948 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
949 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
950 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
951 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
952 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
953 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
954 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
966 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
967 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
974 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
975 if (
auto KindCost = Entry->Cost[
CostKind])
976 return LT.first * *KindCost;
981 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
982 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
983 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
984 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
985 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
986 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
987 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
988 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
989 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
990 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
991 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
992 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
994 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
995 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
996 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
997 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
998 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
999 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1000 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1001 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1002 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1003 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1004 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1005 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1015 if (
const auto *Entry =
1017 if (
auto KindCost = Entry->Cost[
CostKind])
1018 return LT.first * *KindCost;
1025 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1026 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1031 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1032 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1033 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1034 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1037 if (ST->useGLMDivSqrtCosts())
1038 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1039 if (
auto KindCost = Entry->Cost[
CostKind])
1040 return LT.first * *KindCost;
1043 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1044 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1045 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1046 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1047 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1048 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1049 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1050 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1051 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1052 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1053 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1054 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1060 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1062 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1063 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1066 if (ST->useSLMArithCosts())
1067 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1068 if (
auto KindCost = Entry->Cost[
CostKind])
1069 return LT.first * *KindCost;
1072 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1073 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1074 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1075 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1077 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1078 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1079 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1080 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1082 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1083 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1084 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1085 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1086 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1087 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1089 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1090 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1091 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1092 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1093 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1094 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1095 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1096 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1098 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1099 {
ISD::MUL, MVT::v32i8, { 6, 11,10,19 } },
1100 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1101 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1102 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1103 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1104 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1108 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1109 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1111 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1112 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1113 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1114 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1115 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1116 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1118 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1119 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1120 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1121 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1122 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1123 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1125 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1126 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1127 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1128 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1129 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1130 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1132 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1133 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1134 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1135 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1136 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1137 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1142 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1143 if (
auto KindCost = Entry->Cost[
CostKind])
1144 return LT.first * *KindCost;
1150 {
ISD::MUL, MVT::v32i8, { 12, 13, 22, 23 } },
1151 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1152 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1153 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1154 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1156 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1157 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1158 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1159 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1161 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1162 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1163 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1164 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1166 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1167 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1168 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1169 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1171 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1172 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1173 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1174 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1175 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1176 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1177 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1178 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1179 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1180 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1182 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1183 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1184 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1185 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1186 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1187 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1188 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1189 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1191 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1192 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1193 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1194 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1195 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1196 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1197 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1198 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1200 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1201 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1202 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1203 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1204 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1205 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1206 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1207 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1209 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1210 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1212 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1213 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1214 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1215 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1216 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1217 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1219 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1220 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1221 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1222 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1223 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1224 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1226 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1227 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1228 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1229 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1230 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1231 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1233 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1234 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1235 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1236 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1237 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1238 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1242 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1243 if (
auto KindCost = Entry->Cost[
CostKind])
1244 return LT.first * *KindCost;
1247 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1248 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1249 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1250 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1252 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1253 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1254 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1255 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1257 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1258 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1259 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1260 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1262 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1263 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1264 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1265 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1267 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1271 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1272 if (
auto KindCost = Entry->Cost[
CostKind])
1273 return LT.first * *KindCost;
1276 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1277 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1278 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1280 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1281 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1282 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1283 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1285 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1286 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1287 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1288 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1290 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1291 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1295 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1296 if (
auto KindCost = Entry->Cost[
CostKind])
1297 return LT.first * *KindCost;
1302 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1303 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1304 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1305 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1307 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1308 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1309 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1310 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1312 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1313 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1314 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1315 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1317 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1318 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1319 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1320 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1322 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1323 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1324 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1325 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1327 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1328 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1329 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1330 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1332 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1333 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1335 {
ISD::MUL, MVT::v16i8, { 5, 18,12,12 } },
1336 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1337 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1338 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1342 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1343 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1344 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1345 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1347 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1348 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1349 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1350 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1352 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1353 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1354 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1356 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1357 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1358 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1360 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1361 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1365 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1366 if (
auto KindCost = Entry->Cost[
CostKind])
1367 return LT.first * *KindCost;
1370 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1371 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1373 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1374 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1376 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1377 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1379 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1380 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1382 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1383 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1387 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1388 if (
auto KindCost = Entry->Cost[
CostKind])
1389 return LT.first * *KindCost;
1394 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1399 if (
auto KindCost = Entry->Cost[
CostKind])
1400 return LT.first * *KindCost;
1411 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1412 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1413 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1415 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1416 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1417 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1418 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1419 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1423 if (
auto KindCost = Entry->Cost[
CostKind])
1424 return LT.first * *KindCost;
1438 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1487 CostKind, Mask.size() / 2, BaseTp);
1500 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1501 LT.second = LT.second.changeVectorElementType(MVT::f16);
1506 int NumElts = LT.second.getVectorNumElements();
1507 if ((
Index % NumElts) == 0)
1510 if (SubLT.second.isVector()) {
1511 int NumSubElts = SubLT.second.getVectorNumElements();
1512 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1520 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1521 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
1522 (NumSubElts % OrigSubElts) == 0 &&
1523 LT.second.getVectorElementType() ==
1524 SubLT.second.getVectorElementType() &&
1525 LT.second.getVectorElementType().getSizeInBits() ==
1527 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1528 "Unexpected number of elements!");
1530 LT.second.getVectorNumElements());
1532 SubLT.second.getVectorNumElements());
1541 return ExtractCost + 1;
1544 "Unexpected vector size");
1546 return ExtractCost + 2;
1557 int NumElts = LT.second.getVectorNumElements();
1559 if (SubLT.second.isVector()) {
1560 int NumSubElts = SubLT.second.getVectorNumElements();
1561 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1574 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1605 if (
const auto *Entry =
1614 MVT LegalVT = LT.second;
1619 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1623 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1630 if (!Mask.empty() && NumOfDests.
isValid()) {
1648 unsigned E = *NumOfDests.
getValue();
1649 unsigned NormalizedVF =
1655 unsigned PrevSrcReg = 0;
1659 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1660 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1665 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1666 PrevRegMask != RegMask)
1674 if (SrcReg != DestReg &&
1679 PrevSrcReg = SrcReg;
1680 PrevRegMask = RegMask;
1693 std::nullopt,
CostKind, 0,
nullptr);
1704 LT.first = NumOfDests * NumOfShufflesPerDest;
1720 if (
const auto *Entry =
1722 return LT.first * Entry->Cost;
1755 if (
const auto *Entry =
1757 return LT.first * Entry->Cost;
1834 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1835 if (
auto KindCost = Entry->Cost[
CostKind])
1836 return LT.first * *KindCost;
1889 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1890 return LT.first * Entry->Cost;
1911 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
1912 return LT.first * Entry->Cost;
1974 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
1975 return LT.first * Entry->Cost;
1988 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
1989 return LT.first * Entry->Cost;
2020 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2021 return LT.first * Entry->Cost;
2077 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2079 if (
const auto *Entry =
2082 LT.second.getVectorElementCount()) &&
2083 "Table entry missing from isLegalBroadcastLoad()");
2084 return LT.first * Entry->Cost;
2087 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2088 return LT.first * Entry->Cost;
2101 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2102 return LT.first * Entry->Cost;
2113 assert(ISD &&
"Invalid opcode");
2118 return Cost == 0 ? 0 : 1;
2933 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2934 return AdjustCost(Entry->Cost);
2938 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2939 return AdjustCost(Entry->Cost);
2943 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2944 return AdjustCost(Entry->Cost);
2949 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2950 return AdjustCost(Entry->Cost);
2954 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2955 return AdjustCost(Entry->Cost);
2959 SimpleDstTy, SimpleSrcTy))
2960 return AdjustCost(Entry->Cost);
2964 SimpleDstTy, SimpleSrcTy))
2965 return AdjustCost(Entry->Cost);
2970 SimpleDstTy, SimpleSrcTy))
2971 return AdjustCost(Entry->Cost);
2976 SimpleDstTy, SimpleSrcTy))
2977 return AdjustCost(Entry->Cost);
2982 SimpleDstTy, SimpleSrcTy))
2983 return AdjustCost(Entry->Cost);
2998 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
2999 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3003 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3004 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3008 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3009 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3014 LTDest.second, LTSrc.second))
3015 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3019 LTDest.second, LTSrc.second))
3020 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3024 LTDest.second, LTSrc.second))
3025 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3029 LTDest.second, LTSrc.second))
3030 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3034 LTDest.second, LTSrc.second))
3035 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3039 LTDest.second, LTSrc.second))
3040 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3044 LTDest.second, LTSrc.second))
3045 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
3050 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3051 Type *ExtSrc = Src->getWithNewBitWidth(32);
3057 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3067 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3068 Type *TruncDst = Dst->getWithNewBitWidth(32);
3091 MVT MTy = LT.second;
3094 assert(ISD &&
"Invalid opcode");
3097 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3110 Pred = cast<CmpInst>(
I)->getPredicate();
3112 bool CmpWithConstant =
false;
3113 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3114 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3119 ExtraCost = CmpWithConstant ? 0 : 1;
3124 ExtraCost = CmpWithConstant ? 0 : 1;
3130 ExtraCost = CmpWithConstant ? 1 : 2;
3141 ExtraCost = CmpWithConstant ? 2 : 3;
3148 if (CondTy && !ST->
hasAVX())
3317 if (ST->useSLMArithCosts())
3319 if (
auto KindCost = Entry->Cost[
CostKind])
3320 return LT.first * (ExtraCost + *KindCost);
3324 if (
auto KindCost = Entry->Cost[
CostKind])
3325 return LT.first * (ExtraCost + *KindCost);
3329 if (
auto KindCost = Entry->Cost[
CostKind])
3330 return LT.first * (ExtraCost + *KindCost);
3334 if (
auto KindCost = Entry->Cost[
CostKind])
3335 return LT.first * (ExtraCost + *KindCost);
3339 if (
auto KindCost = Entry->Cost[
CostKind])
3340 return LT.first * (ExtraCost + *KindCost);
3344 if (
auto KindCost = Entry->Cost[
CostKind])
3345 return LT.first * (ExtraCost + *KindCost);
3349 if (
auto KindCost = Entry->Cost[
CostKind])
3350 return LT.first * (ExtraCost + *KindCost);
3354 if (
auto KindCost = Entry->Cost[
CostKind])
3355 return LT.first * (ExtraCost + *KindCost);
3359 if (
auto KindCost = Entry->Cost[
CostKind])
3360 return LT.first * (ExtraCost + *KindCost);
3364 if (
auto KindCost = Entry->Cost[
CostKind])
3365 return LT.first * (ExtraCost + *KindCost);
3390 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3391 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3392 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3393 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3394 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3395 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3396 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3397 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3398 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3399 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3400 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3401 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3402 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3403 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3404 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3423 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3424 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3425 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3426 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3427 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3428 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3429 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3430 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3431 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3432 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3433 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3434 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3436 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3437 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3438 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3439 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3440 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3441 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3444 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3445 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3467 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3468 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3469 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3470 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3471 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3472 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3473 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3474 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3475 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3476 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3477 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3478 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3479 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3483 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3484 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3485 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3486 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3487 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3488 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3489 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3490 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3491 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3492 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3493 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3494 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3495 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3496 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3497 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3498 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3499 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3500 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3503 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3504 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3505 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3506 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3511 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3512 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3513 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3514 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3519 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3520 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3521 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3522 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3523 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3524 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3525 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3526 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3527 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3535 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3536 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3537 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3538 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3539 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3540 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3541 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3542 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3543 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3544 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3545 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3546 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3547 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3548 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3549 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3550 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3551 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3552 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3553 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3554 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3555 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3556 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3557 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3558 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3559 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3560 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3561 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3562 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3563 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3564 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3565 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3566 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3567 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3568 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3569 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3570 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3571 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3572 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3573 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3574 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3575 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3576 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3577 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3578 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3579 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3580 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3581 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3582 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3610 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3613 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3614 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3630 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3631 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3632 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3633 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3634 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3635 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3636 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3637 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3638 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3639 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3640 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3641 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3642 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3643 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3644 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3645 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } }
3648 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3649 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3650 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3651 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3652 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3653 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3654 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3655 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3670 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3671 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3672 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3673 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3674 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3675 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3676 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3677 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3678 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3679 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3680 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3681 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3682 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3683 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3686 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3687 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3688 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3689 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3690 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3691 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3692 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3693 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3696 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3697 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3698 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3699 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3700 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3701 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3702 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3703 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3704 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3705 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3711 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3712 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3713 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3714 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3715 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3716 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3717 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3718 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3719 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3720 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3732 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
3734 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
3735 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
3738 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
3739 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
3740 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
3741 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
3754 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
3756 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
3757 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
3758 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
3759 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
3760 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
3761 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
3762 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
3763 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
3764 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
3765 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
3766 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
3767 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
3768 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
3769 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
3770 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
3771 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
3772 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
3773 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
3774 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
3775 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
3776 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
3777 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
3778 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
3779 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
3782 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
3783 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
3784 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3785 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3786 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3787 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
3788 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3789 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3790 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3791 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3797 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
3798 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
3799 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3800 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3801 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3802 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
3803 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
3804 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3805 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3806 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3817 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
3818 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
3820 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
3821 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
3825 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
3827 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
3834 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
3836 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
3846 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
3849 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
3850 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
3851 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3852 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3853 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3854 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3855 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3856 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
3857 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3858 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3859 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
3860 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3861 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3864 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
3865 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
3866 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
3874 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
3875 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
3876 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
3877 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
3878 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
3879 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
3880 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
3881 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
3882 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
3883 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
3884 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
3885 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
3888 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
3889 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
3890 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
3891 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
3896 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
3899 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
3900 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
3901 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
3902 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
3903 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
3904 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
3905 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
3906 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
3907 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
3908 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
3909 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
3910 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
3913 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
3914 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
3915 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3916 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
3917 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
3918 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
3919 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3920 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
3925 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
3926 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
3927 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
3928 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3929 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
3930 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
3931 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
3932 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3938 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
3944 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
3971 {
ISD::ABS, MVT::i64, { 1, 2, 3, 4 } },
3979 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
3980 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
3982 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
3983 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
3984 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
3985 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
3986 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
3992 {
ISD::ABS, MVT::i32, { 1, 2, 3, 4 } },
3993 {
ISD::ABS, MVT::i16, { 2, 2, 3, 4 } },
3994 {
ISD::ABS, MVT::i8, { 2, 4, 4, 4 } },
4015 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4016 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4018 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4019 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4024 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4025 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4027 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4028 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4030 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4031 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4033 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4034 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4036 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4037 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4057 case Intrinsic::abs:
4060 case Intrinsic::bitreverse:
4063 case Intrinsic::bswap:
4066 case Intrinsic::ctlz:
4069 case Intrinsic::ctpop:
4072 case Intrinsic::cttz:
4075 case Intrinsic::fshl:
4079 if (Args[0] == Args[1]) {
4083 if (isa_and_nonnull<ConstantInt>(Args[2]))
4088 case Intrinsic::fshr:
4093 if (Args[0] == Args[1]) {
4097 if (isa_and_nonnull<ConstantInt>(Args[2]))
4102 case Intrinsic::maxnum:
4103 case Intrinsic::minnum:
4107 case Intrinsic::sadd_sat:
4110 case Intrinsic::smax:
4113 case Intrinsic::smin:
4116 case Intrinsic::ssub_sat:
4119 case Intrinsic::uadd_sat:
4122 case Intrinsic::umax:
4125 case Intrinsic::umin:
4128 case Intrinsic::usub_sat:
4131 case Intrinsic::sqrt:
4134 case Intrinsic::sadd_with_overflow:
4135 case Intrinsic::ssub_with_overflow:
4138 OpTy =
RetTy->getContainedType(0);
4140 case Intrinsic::uadd_with_overflow:
4141 case Intrinsic::usub_with_overflow:
4144 OpTy =
RetTy->getContainedType(0);
4146 case Intrinsic::umul_with_overflow:
4147 case Intrinsic::smul_with_overflow:
4150 OpTy =
RetTy->getContainedType(0);
4157 MVT MTy = LT.second;
4173 return LT.first *
Cost;
4177 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4178 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4181 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4182 if (Cst->isAllOnesValue())
4190 auto adjustTableCost = [](
int ISD,
unsigned Cost,
4198 return LegalizationCost * 1;
4200 return LegalizationCost * (int)
Cost;
4203 if (ST->useGLMDivSqrtCosts())
4205 if (
auto KindCost = Entry->Cost[
CostKind])
4206 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4209 if (ST->useSLMArithCosts())
4211 if (
auto KindCost = Entry->Cost[
CostKind])
4212 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4216 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4217 if (
auto KindCost = Entry->Cost[
CostKind])
4218 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4221 if (ST->hasBITALG())
4222 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4223 if (
auto KindCost = Entry->Cost[
CostKind])
4224 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4227 if (ST->hasVPOPCNTDQ())
4228 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4229 if (
auto KindCost = Entry->Cost[
CostKind])
4230 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4235 if (
auto KindCost = Entry->Cost[
CostKind])
4236 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4241 if (
auto KindCost = Entry->Cost[
CostKind])
4242 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4247 if (
auto KindCost = Entry->Cost[
CostKind])
4248 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4253 if (
auto KindCost = Entry->Cost[
CostKind])
4254 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4259 if (
auto KindCost = Entry->Cost[
CostKind])
4260 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4265 if (
auto KindCost = Entry->Cost[
CostKind])
4266 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4271 if (
auto KindCost = Entry->Cost[
CostKind])
4272 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4277 if (
auto KindCost = Entry->Cost[
CostKind])
4278 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4283 if (
auto KindCost = Entry->Cost[
CostKind])
4284 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4289 if (
auto KindCost = Entry->Cost[
CostKind])
4290 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4295 if (
auto KindCost = Entry->Cost[
CostKind])
4296 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4302 if (
auto KindCost = Entry->Cost[
CostKind])
4303 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4307 if (
auto KindCost = Entry->Cost[
CostKind])
4308 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4312 if (ST->hasLZCNT()) {
4315 if (
auto KindCost = Entry->Cost[
CostKind])
4316 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4320 if (
auto KindCost = Entry->Cost[
CostKind])
4321 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4325 if (ST->hasPOPCNT()) {
4328 if (
auto KindCost = Entry->Cost[
CostKind])
4329 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4333 if (
auto KindCost = Entry->Cost[
CostKind])
4334 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4338 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4340 if (II->hasOneUse() && isa<StoreInst>(II->user_back()))
4342 if (
auto *LI = dyn_cast<LoadInst>(II->getOperand(0))) {
4343 if (LI->hasOneUse())
4351 if (
auto KindCost = Entry->Cost[
CostKind])
4352 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4356 if (
auto KindCost = Entry->Cost[
CostKind])
4357 return adjustTableCost(Entry->ISD, *KindCost, LT.first, ICA.
getFlags());
4380 if (
Index == -1U && (Opcode == Instruction::ExtractElement ||
4381 Opcode == Instruction::InsertElement)) {
4386 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4391 if (Opcode == Instruction::ExtractElement) {
4397 if (Opcode == Instruction::InsertElement) {
4405 if (
Index != -1U && (Opcode == Instruction::ExtractElement ||
4406 Opcode == Instruction::InsertElement)) {
4408 if (Opcode == Instruction::ExtractElement &&
4410 cast<FixedVectorType>(Val)->getNumElements() > 1)
4417 if (!LT.second.isVector())
4421 unsigned SizeInBits = LT.second.getSizeInBits();
4422 unsigned NumElts = LT.second.getVectorNumElements();
4423 unsigned SubNumElts = NumElts;
4428 if (SizeInBits > 128) {
4429 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4430 unsigned NumSubVecs = SizeInBits / 128;
4431 SubNumElts = NumElts / NumSubVecs;
4432 if (SubNumElts <=
Index) {
4433 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4434 Index %= SubNumElts;
4438 MVT MScalarTy = LT.second.getScalarType();
4439 auto IsCheapPInsrPExtrInsertPS = [&]() {
4442 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4444 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4445 Opcode == Instruction::InsertElement);
4453 (Opcode != Instruction::InsertElement || !Op0 ||
4454 isa<UndefValue>(Op0)))
4455 return RegisterFileMoveCost;
4457 if (Opcode == Instruction::InsertElement &&
4458 isa_and_nonnull<UndefValue>(Op0)) {
4460 if (isa_and_nonnull<LoadInst>(Op1))
4461 return RegisterFileMoveCost;
4462 if (!IsCheapPInsrPExtrInsertPS()) {
4465 return 2 + RegisterFileMoveCost;
4467 return 1 + RegisterFileMoveCost;
4472 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4473 return 1 + RegisterFileMoveCost;
4477 assert(ISD &&
"Unexpected vector opcode");
4478 if (ST->useSLMArithCosts())
4480 return Entry->Cost + RegisterFileMoveCost;
4483 if (IsCheapPInsrPExtrInsertPS())
4484 return 1 + RegisterFileMoveCost;
4493 if (Opcode == Instruction::InsertElement) {
4494 auto *SubTy = cast<VectorType>(Val);
4502 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4506 RegisterFileMoveCost;
4511 bool Insert,
bool Extract,
4514 cast<FixedVectorType>(Ty)->getNumElements() &&
4515 "Vector size mismatch");
4518 MVT MScalarTy = LT.second.getScalarType();
4519 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4522 constexpr unsigned LaneBitWidth = 128;
4523 assert((LegalVectorBitWidth < LaneBitWidth ||
4524 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4527 const int NumLegalVectors = *LT.first.getValue();
4528 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4533 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4535 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4538 if (LegalVectorBitWidth <= LaneBitWidth) {
4554 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4555 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4556 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4557 unsigned NumLegalElts =
4558 LT.second.getVectorNumElements() * NumLegalVectors;
4560 "Vector has been legalized to smaller element count");
4561 assert((NumLegalElts % NumLanesTotal) == 0 &&
4562 "Unexpected elts per lane");
4563 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4565 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4569 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4571 NumEltsPerLane, NumEltsPerLane *
I);
4572 if (LaneEltMask.
isZero())
4583 APInt AffectedLanes =
4586 AffectedLanes, NumLegalVectors,
true);
4587 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4588 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4589 unsigned I = NumLegalLanes * LegalVec + Lane;
4592 if (!AffectedLanes[
I] ||
4593 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4600 }
else if (LT.second.isVector()) {
4611 unsigned NumElts = LT.second.getVectorNumElements();
4613 PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements());
4614 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4623 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4624 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4625 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4629 if (LT.second.isVector()) {
4630 unsigned NumLegalElts =
4631 LT.second.getVectorNumElements() * NumLegalVectors;
4633 "Vector has been legalized to smaller element count");
4637 if (LegalVectorBitWidth > LaneBitWidth) {
4638 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4639 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4640 assert((NumLegalElts % NumLanesTotal) == 0 &&
4641 "Unexpected elts per lane");
4642 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4646 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4650 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4652 NumEltsPerLane,
I * NumEltsPerLane);
4653 if (LaneEltMask.
isZero())
4658 LaneTy, LaneEltMask,
false, Extract,
CostKind);
4675 int VF,
const APInt &DemandedDstElts,
4681 auto bailout = [&]() {
4691 unsigned PromEltTyBits = EltTyBits;
4692 switch (EltTyBits) {
4723 int NumDstElements = VF * ReplicationFactor;
4737 if (PromEltTyBits != EltTyBits) {
4743 Instruction::SExt, PromSrcVecTy, SrcVecTy,
4750 ReplicationFactor, VF,
4756 "We expect that the legalization doesn't affect the element width, "
4757 "doesn't coalesce/split elements.");
4760 unsigned NumDstVectors =
4761 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
4770 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
4771 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
4776 return NumDstVectorsDemanded * SingleShuffleCost;
4787 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
4790 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
4791 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
4798 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4808 auto *VTy = dyn_cast<FixedVectorType>(Src);
4813 if (Opcode == Instruction::Store && OpInfo.
isConstant())
4819 if (!VTy || !LT.second.isVector()) {
4821 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
4824 bool IsLoad = Opcode == Instruction::Load;
4826 Type *EltTy = VTy->getElementType();
4831 const unsigned SrcNumElt = VTy->getNumElements();
4834 int NumEltRemaining = SrcNumElt;
4836 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
4838 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
4841 const unsigned XMMBits = 128;
4842 if (XMMBits % EltTyBits != 0)
4846 const int NumEltPerXMM = XMMBits / EltTyBits;
4850 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
4851 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
4853 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
4857 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
4859 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
4860 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
4861 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
4862 "Unless we haven't halved the op size yet, "
4863 "we have less than two op's sized units of work left.");
4865 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
4869 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
4870 "After halving sizes, the vector elt count is no longer a multiple "
4871 "of number of elements per operation?");
4872 auto *CoalescedVecTy =
4873 CurrNumEltPerOp == 1
4877 EltTyBits * CurrNumEltPerOp),
4878 CurrVecTy->getNumElements() / CurrNumEltPerOp);
4881 "coalesciing elements doesn't change vector width.");
4883 while (NumEltRemaining > 0) {
4884 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
4888 if (NumEltRemaining < CurrNumEltPerOp &&
4889 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
4890 CurrOpSizeBytes != 1)
4893 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
4896 if (SubVecEltsLeft == 0) {
4897 SubVecEltsLeft += CurrVecTy->getNumElements();
4902 VTy, std::nullopt,
CostKind, NumEltDone(),
4910 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
4911 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
4912 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
4913 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
4914 APInt DemandedElts =
4916 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
4917 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
4927 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
4929 else if (CurrOpSizeBytes < 4)
4934 SubVecEltsLeft -= CurrNumEltPerOp;
4935 NumEltRemaining -= CurrNumEltPerOp;
4940 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
4949 bool IsLoad = (Instruction::Load == Opcode);
4950 bool IsStore = (Instruction::Store == Opcode);
4952 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
4957 unsigned NumElem = SrcVTy->getNumElements();
4965 MaskTy, DemandedElts,
false,
true,
CostKind);
4970 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
4972 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
4976 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
4983 if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
4984 LT.second.getVectorNumElements() == NumElem)
4991 else if (LT.first * LT.second.getVectorNumElements() > NumElem) {
4993 LT.second.getVectorNumElements());
5001 return Cost + LT.first * (IsLoad ? 2 : 8);
5004 return Cost + LT.first;
5012 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5016 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5018 return getGEPCost(BaseGEP->getSourceElementType(),
5019 BaseGEP->getPointerOperand(), Indices,
nullptr,
5034 const unsigned NumVectorInstToHideOverhead = 10;
5047 return NumVectorInstToHideOverhead;
5057 std::optional<FastMathFlags> FMF,
5098 assert(ISD &&
"Invalid opcode");
5106 if (ST->useSLMArithCosts())
5121 MVT MTy = LT.second;
5123 auto *ValVTy = cast<FixedVectorType>(ValTy);
5136 if (LT.first != 1 && MTy.
isVector() &&
5142 ArithmeticCost *= LT.first - 1;
5145 if (ST->useSLMArithCosts())
5147 return ArithmeticCost + Entry->Cost;
5151 return ArithmeticCost + Entry->Cost;
5155 return ArithmeticCost + Entry->Cost;
5204 if (ValVTy->getElementType()->isIntegerTy(1)) {
5206 if (LT.first != 1 && MTy.
isVector() &&
5212 ArithmeticCost *= LT.first - 1;
5216 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5217 return ArithmeticCost + Entry->Cost;
5220 return ArithmeticCost + Entry->Cost;
5223 return ArithmeticCost + Entry->Cost;
5226 return ArithmeticCost + Entry->Cost;
5231 unsigned NumVecElts = ValVTy->getNumElements();
5232 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5242 if (LT.first != 1 && MTy.
isVector() &&
5248 ReductionCost *= LT.first - 1;
5254 while (NumVecElts > 1) {
5256 unsigned Size = NumVecElts * ScalarSize;
5265 }
else if (
Size == 128) {
5268 if (ValVTy->isFloatingPointTy())
5275 std::nullopt,
CostKind, 0,
nullptr);
5276 }
else if (
Size == 64) {
5279 if (ValVTy->isFloatingPointTy())
5286 std::nullopt,
CostKind, 0,
nullptr);
5292 Instruction::LShr, ShiftTy,
CostKind,
5319 MVT MTy = LT.second;
5323 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5327 "Expected float point or integer vector type.");
5328 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5396 auto *ValVTy = cast<FixedVectorType>(ValTy);
5397 unsigned NumVecElts = ValVTy->getNumElements();
5401 if (LT.first != 1 && MTy.
isVector() &&
5407 MinMaxCost *= LT.first - 1;
5413 return MinMaxCost + Entry->Cost;
5417 return MinMaxCost + Entry->Cost;
5421 return MinMaxCost + Entry->Cost;
5425 return MinMaxCost + Entry->Cost;
5437 while (NumVecElts > 1) {
5439 unsigned Size = NumVecElts * ScalarSize;
5447 }
else if (
Size == 128) {
5456 std::nullopt,
CostKind, 0,
nullptr);
5457 }
else if (
Size == 64) {
5465 std::nullopt,
CostKind, 0,
nullptr);
5518 if (BitSize % 64 != 0)
5519 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5524 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5530 return std::max<InstructionCost>(1,
Cost);
5545 unsigned ImmIdx = ~0U;
5549 case Instruction::GetElementPtr:
5556 case Instruction::Store:
5559 case Instruction::ICmp:
5565 if (
Idx == 1 && Imm.getBitWidth() == 64) {
5566 uint64_t ImmVal = Imm.getZExtValue();
5567 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5572 case Instruction::And:
5576 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.isIntN(32))
5580 case Instruction::Add:
5581 case Instruction::Sub:
5583 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
5587 case Instruction::UDiv:
5588 case Instruction::SDiv:
5589 case Instruction::URem:
5590 case Instruction::SRem:
5595 case Instruction::Mul:
5596 case Instruction::Or:
5597 case Instruction::Xor:
5601 case Instruction::Shl:
5602 case Instruction::LShr:
5603 case Instruction::AShr:
5607 case Instruction::Trunc:
5608 case Instruction::ZExt:
5609 case Instruction::SExt:
5610 case Instruction::IntToPtr:
5611 case Instruction::PtrToInt:
5612 case Instruction::BitCast:
5613 case Instruction::PHI:
5614 case Instruction::Call:
5615 case Instruction::Select:
5616 case Instruction::Ret:
5617 case Instruction::Load:
5621 if (
Idx == ImmIdx) {
5646 case Intrinsic::sadd_with_overflow:
5647 case Intrinsic::uadd_with_overflow:
5648 case Intrinsic::ssub_with_overflow:
5649 case Intrinsic::usub_with_overflow:
5650 case Intrinsic::smul_with_overflow:
5651 case Intrinsic::umul_with_overflow:
5652 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
5655 case Intrinsic::experimental_stackmap:
5656 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5659 case Intrinsic::experimental_patchpoint_void:
5660 case Intrinsic::experimental_patchpoint:
5661 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5672 return Opcode == Instruction::PHI ? 0 : 1;
5677int X86TTIImpl::getGatherOverhead()
const {
5690int X86TTIImpl::getScatterOverhead()
const {
5705 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
5706 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5716 if (IndexSize < 64 || !
GEP)
5719 unsigned NumOfVarIndices = 0;
5720 const Value *Ptrs =
GEP->getPointerOperand();
5723 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
5724 if (isa<Constant>(
GEP->getOperand(
I)))
5726 Type *IndxTy =
GEP->getOperand(
I)->getType();
5727 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
5728 IndxTy = IndexVTy->getElementType();
5730 !isa<SExtInst>(
GEP->getOperand(
I))) ||
5731 ++NumOfVarIndices > 1)
5734 return (
unsigned)32;
5739 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
5740 ? getIndexSizeInBits(
Ptr,
DL)
5748 *std::max(IdxsLT.first, SrcLT.first).getValue();
5749 if (SplitFactor > 1) {
5753 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
5759 const int GSOverhead = (Opcode == Instruction::Load)
5760 ? getGatherOverhead()
5761 : getScatterOverhead();
5777 Type *SrcVTy,
bool VariableMask,
5781 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5789 MaskTy, DemandedElts,
false,
true,
CostKind);
5794 MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
5799 DemandedElts,
false,
true,
CostKind);
5809 cast<FixedVectorType>(SrcVTy), DemandedElts,
5810 Opcode == Instruction::Load,
5811 Opcode == Instruction::Store,
CostKind);
5813 return AddressUnpackCost + MemoryOpCost + MaskUnpackCost + InsertExtractCost;
5818 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
5822 if ((Opcode == Instruction::Load &&
5825 Align(Alignment))) ||
5826 (Opcode == Instruction::Store &&
5837 if (!PtrTy &&
Ptr->getType()->isVectorTy())
5838 PtrTy = dyn_cast<PointerType>(
5839 cast<VectorType>(
Ptr->getType())->getElementType());
5840 assert(PtrTy &&
"Unexpected type for Ptr argument");
5843 if ((Opcode == Instruction::Load &&
5846 Align(Alignment)))) ||
5847 (Opcode == Instruction::Store &&
5850 Align(Alignment)))))
5851 return getGSScalarCost(Opcode,
CostKind, SrcVTy, VariableMask, Alignment,
5854 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
5870 return ST->hasMacroFusion() || ST->hasBranchFusion();
5878 if (isa<VectorType>(DataTy) &&
5879 cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5889 if (ScalarTy->
isHalfTy() && ST->hasBWI())
5899 return IntWidth == 32 || IntWidth == 64 ||
5900 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
5912 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
5929 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
5951 if (!isa<VectorType>(DataTy))
5958 if (cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5961 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
5970 return IntWidth == 32 || IntWidth == 64 ||
5971 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
5978bool X86TTIImpl::supportsGather()
const {
5992 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
5993 return NumElts == 1 ||
5994 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6009 return IntWidth == 32 || IntWidth == 64;
6013 if (!supportsGather() || !ST->preferGather())
6028 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6029 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6034 for (
int Lane : seq<int>(0, NumElements)) {
6035 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6037 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6039 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6043 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6045 return ST->
hasSSE3() && NumElements % 4 == 0;
6047 return ST->
hasSSE3() && NumElements % 2 == 0;
6053 if (!ST->
hasAVX512() || !ST->preferScatter())
6066 if (
I->getOpcode() == Instruction::FDiv)
6082 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6084 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6087 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6088 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6089 if (RealCallerBits == RealCalleeBits)
6094 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6098 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6100 if (CB->isInlineAsm())
6104 for (
Value *Arg : CB->args())
6105 Types.push_back(Arg->getType());
6106 if (!CB->getType()->isVoidTy())
6107 Types.push_back(CB->getType());
6110 auto IsSimpleTy = [](
Type *Ty) {
6111 return !Ty->isVectorTy() && !Ty->isAggregateType();
6113 if (
all_of(Types, IsSimpleTy))
6116 if (
Function *NestedCallee = CB->getCalledFunction()) {
6118 if (NestedCallee->isIntrinsic())
6153 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6162 Options.AllowOverlappingLoads =
true;
6167 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6168 Options.LoadSizes.push_back(64);
6169 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6170 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6172 if (ST->is64Bit()) {
6173 Options.LoadSizes.push_back(8);
6175 Options.LoadSizes.push_back(4);
6176 Options.LoadSizes.push_back(2);
6177 Options.LoadSizes.push_back(1);
6182 return supportsGather();
6193 return !(ST->isAtom());
6213 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6219 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6231 if (UseMaskedMemOp) {
6233 for (
unsigned Index : Indices) {
6234 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
6235 for (
unsigned Elm = 0; Elm < VF; Elm++)
6236 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
6243 UseMaskForGaps ? DemandedLoadStoreElts
6252 if (UseMaskForGaps) {
6258 if (Opcode == Instruction::Load) {
6265 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6266 {3, MVT::v16i8, 12},
6267 {3, MVT::v32i8, 14},
6268 {3, MVT::v64i8, 22},
6271 if (
const auto *Entry =
6273 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6283 ShuffleKind, SingleMemOpTy, std::nullopt,
CostKind, 0,
nullptr);
6285 unsigned NumOfLoadsInInterleaveGrp =
6286 Indices.
size() ? Indices.
size() : Factor;
6295 unsigned NumOfUnfoldedLoads =
6296 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6299 unsigned NumOfShufflesPerResult =
6300 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6307 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6310 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6317 assert(Opcode == Instruction::Store &&
6318 "Expected Store Instruction at this point");
6320 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6321 {3, MVT::v16i8, 12},
6322 {3, MVT::v32i8, 14},
6323 {3, MVT::v64i8, 26},
6326 {4, MVT::v16i8, 11},
6327 {4, MVT::v32i8, 14},
6331 if (
const auto *Entry =
6333 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6338 unsigned NumOfSources = Factor;
6341 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6345 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6348 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6356 bool UseMaskForCond,
bool UseMaskForGaps) {
6357 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6359 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6360 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6365 return ST->hasBWI();
6367 return ST->hasBF16();
6370 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6372 Opcode, VecTy, Factor, Indices, Alignment,
6375 if (UseMaskForCond || UseMaskForGaps)
6378 UseMaskForCond, UseMaskForGaps);
6398 unsigned VF = VecTy->getNumElements() / Factor;
6399 Type *ScalarTy = VecTy->getElementType();
6431 {2, MVT::v16i16, 9},
6432 {2, MVT::v32i16, 18},
6435 {2, MVT::v16i32, 8},
6436 {2, MVT::v32i32, 16},
6440 {2, MVT::v16i64, 16},
6441 {2, MVT::v32i64, 32},
6446 {3, MVT::v16i8, 11},
6447 {3, MVT::v32i8, 14},
6452 {3, MVT::v16i16, 28},
6453 {3, MVT::v32i16, 56},
6458 {3, MVT::v16i32, 14},
6459 {3, MVT::v32i32, 32},
6463 {3, MVT::v8i64, 10},
6464 {3, MVT::v16i64, 20},
6469 {4, MVT::v16i8, 24},
6470 {4, MVT::v32i8, 56},
6473 {4, MVT::v4i16, 17},
6474 {4, MVT::v8i16, 33},
6475 {4, MVT::v16i16, 75},
6476 {4, MVT::v32i16, 150},
6480 {4, MVT::v8i32, 16},
6481 {4, MVT::v16i32, 32},
6482 {4, MVT::v32i32, 68},
6486 {4, MVT::v8i64, 20},
6487 {4, MVT::v16i64, 40},
6492 {6, MVT::v16i8, 43},
6493 {6, MVT::v32i8, 82},
6495 {6, MVT::v2i16, 13},
6497 {6, MVT::v8i16, 39},
6498 {6, MVT::v16i16, 106},
6499 {6, MVT::v32i16, 212},
6502 {6, MVT::v4i32, 15},
6503 {6, MVT::v8i32, 31},
6504 {6, MVT::v16i32, 64},
6507 {6, MVT::v4i64, 18},
6508 {6, MVT::v8i64, 36},
6513 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6527 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6532 {2, MVT::v16i16, 4},
6533 {2, MVT::v32i16, 8},
6537 {2, MVT::v16i32, 8},
6538 {2, MVT::v32i32, 16},
6543 {2, MVT::v16i64, 16},
6544 {2, MVT::v32i64, 32},
6549 {3, MVT::v16i8, 11},
6550 {3, MVT::v32i8, 13},
6554 {3, MVT::v8i16, 12},
6555 {3, MVT::v16i16, 27},
6556 {3, MVT::v32i16, 54},
6560 {3, MVT::v8i32, 11},
6561 {3, MVT::v16i32, 22},
6562 {3, MVT::v32i32, 48},
6566 {3, MVT::v8i64, 12},
6567 {3, MVT::v16i64, 24},
6573 {4, MVT::v32i8, 12},
6577 {4, MVT::v8i16, 10},
6578 {4, MVT::v16i16, 32},
6579 {4, MVT::v32i16, 64},
6583 {4, MVT::v8i32, 16},
6584 {4, MVT::v16i32, 32},
6585 {4, MVT::v32i32, 64},
6589 {4, MVT::v8i64, 20},
6590 {4, MVT::v16i64, 40},
6595 {6, MVT::v16i8, 27},
6596 {6, MVT::v32i8, 90},
6598 {6, MVT::v2i16, 10},
6599 {6, MVT::v4i16, 15},
6600 {6, MVT::v8i16, 21},
6601 {6, MVT::v16i16, 58},
6602 {6, MVT::v32i16, 90},
6605 {6, MVT::v4i32, 12},
6606 {6, MVT::v8i32, 33},
6607 {6, MVT::v16i32, 66},
6610 {6, MVT::v4i64, 15},
6611 {6, MVT::v8i64, 30},
6614 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6625 if (Opcode == Instruction::Load) {
6626 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6630 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6634 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6636 return GetDiscountedCost(Entry);
6639 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6641 return GetDiscountedCost(Entry);
6644 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6646 return GetDiscountedCost(Entry);
6648 assert(Opcode == Instruction::Store &&
6649 "Expected Store Instruction at this point");
6651 "Interleaved store only supports fully-interleaved groups.");
6653 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6655 return MemOpCosts + Entry->Cost;
6658 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6660 return MemOpCosts + Entry->Cost;
6665 UseMaskForCond, UseMaskForGaps);
6670 bool HasBaseReg, int64_t Scale,
6671 unsigned AddrSpace)
const {
6698 return AM.
Scale != 0;
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.