71#define DEBUG_TYPE "x86-isel"
74 "x86-experimental-pref-innermost-loop-alignment",
cl::init(4),
76 "Sets the preferable loop alignment for experiments (as log2 bytes) "
77 "for innermost loops only. If specified, this option overrides "
78 "alignment set by x86-experimental-pref-loop-alignment."),
82 "x86-br-merging-base-cost",
cl::init(2),
84 "Sets the cost threshold for when multiple conditionals will be merged "
85 "into one branch versus be split in multiple branches. Merging "
86 "conditionals saves branches at the cost of additional instructions. "
87 "This value sets the instruction cost limit, below which conditionals "
88 "will be merged, and above which conditionals will be split. Set to -1 "
89 "to never merge branches."),
93 "x86-br-merging-ccmp-bias",
cl::init(6),
94 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that the target "
95 "supports conditional compare instructions."),
99 "x86-br-merging-likely-bias",
cl::init(0),
100 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that it is likely "
101 "that all conditionals will be executed. For example for merging "
102 "the conditionals (a == b && c > d), if its known that a == b is "
103 "likely, then it is likely that if the conditionals are split "
104 "both sides will be executed, so it may be desirable to increase "
105 "the instruction cost threshold. Set to -1 to never merge likely "
110 "x86-br-merging-unlikely-bias",
cl::init(-1),
112 "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
113 "that all conditionals will be executed. For example for merging "
114 "the conditionals (a == b && c > d), if its known that a == b is "
115 "unlikely, then it is unlikely that if the conditionals are split "
116 "both sides will be executed, so it may be desirable to decrease "
117 "the instruction cost threshold. Set to -1 to never merge unlikely "
122 "mul-constant-optimization",
cl::init(
true),
123 cl::desc(
"Replace 'mul x, Const' with more effective instructions like "
130 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
147 if (Subtarget.isAtom())
149 else if (Subtarget.is64Bit())
158 if (Subtarget.hasSlowDivide32())
160 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
166 static const struct {
168 const char *
const Name;
178 for (
const auto &LC : LibraryCalls) {
199 if (Subtarget.is64Bit())
216 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
225 if (Subtarget.is64Bit())
234 if (Subtarget.is64Bit())
242 if (Subtarget.is64Bit())
253 if (Subtarget.is64Bit())
257 if (!Subtarget.useSoftFloat()) {
321 if (!Subtarget.is64Bit()) {
330 for (
MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
334 if (Subtarget.is64Bit()) {
348 if (Subtarget.is64Bit()) {
353 }
else if (!Subtarget.is64Bit())
366 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
377 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
378 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
382 if (Subtarget.is64Bit())
393 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
411 if (!Subtarget.hasBMI()) {
414 if (Subtarget.is64Bit()) {
420 if (Subtarget.hasLZCNT()) {
426 for (
auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
427 if (VT == MVT::i64 && !Subtarget.is64Bit())
441 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ?
Custom :
Expand);
448 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
453 for (
MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
466 if (Subtarget.is64Bit())
468 if (Subtarget.hasPOPCNT()) {
482 if (!Subtarget.hasMOVBE())
486 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
492 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
493 if (VT == MVT::i64 && !Subtarget.is64Bit())
513 for (
auto VT : { MVT::i32, MVT::i64 }) {
514 if (VT == MVT::i64 && !Subtarget.is64Bit())
525 for (
auto VT : { MVT::i32, MVT::i64 }) {
526 if (VT == MVT::i64 && !Subtarget.is64Bit())
539 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
549 if (!Subtarget.is64Bit())
552 if (Subtarget.is64Bit() && Subtarget.
hasAVX()) {
585 bool Is64Bit = Subtarget.is64Bit();
636 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
640 : &X86::FR16RegClass);
642 : &X86::FR32RegClass);
644 : &X86::FR64RegClass);
652 for (
auto VT : { MVT::f32, MVT::f64 }) {
673 setF16Action(MVT::f16,
Promote);
719 }
else if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1() &&
720 (UseX87 || Is64Bit)) {
758 for (
auto VT : { MVT::f32, MVT::f64 }) {
771 if (UseX87 && (
getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
772 addLegalFPImmediate(
APFloat(+0.0f));
773 addLegalFPImmediate(
APFloat(+1.0f));
774 addLegalFPImmediate(
APFloat(-0.0f));
775 addLegalFPImmediate(
APFloat(-1.0f));
777 addLegalFPImmediate(
APFloat(+0.0f));
782 addLegalFPImmediate(
APFloat(+0.0));
783 addLegalFPImmediate(
APFloat(+1.0));
784 addLegalFPImmediate(
APFloat(-0.0));
785 addLegalFPImmediate(
APFloat(-1.0));
787 addLegalFPImmediate(
APFloat(+0.0));
818 addLegalFPImmediate(TmpFlt);
820 addLegalFPImmediate(TmpFlt);
826 addLegalFPImmediate(TmpFlt2);
828 addLegalFPImmediate(TmpFlt2);
875 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.
hasSSE1()) {
877 : &X86::VR128RegClass);
953 for (
auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
954 MVT::v4f32, MVT::v8f32, MVT::v16f32,
955 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
1038 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
1043 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1()) {
1045 : &X86::VR128RegClass);
1069 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
1071 : &X86::VR128RegClass);
1076 : &X86::VR128RegClass);
1078 : &X86::VR128RegClass);
1080 : &X86::VR128RegClass);
1082 : &X86::VR128RegClass);
1084 : &X86::VR128RegClass);
1086 for (
auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
1091 for (
auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1092 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1126 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1149 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1169 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1177 for (
auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1182 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1188 setF16Action(MVT::v8f16,
Expand);
1213 for (
auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1287 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1291 if (VT == MVT::v2i64)
continue;
1305 if (Subtarget.hasGFNI()) {
1312 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSSE3()) {
1317 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1329 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE41()) {
1330 for (
MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1370 for (
auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1385 if (Subtarget.is64Bit() && !Subtarget.
hasAVX512()) {
1397 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE42()) {
1401 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1402 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1403 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1409 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1413 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX()) {
1417 : &X86::VR256RegClass);
1419 : &X86::VR256RegClass);
1421 : &X86::VR256RegClass);
1423 : &X86::VR256RegClass);
1425 : &X86::VR256RegClass);
1427 : &X86::VR256RegClass);
1429 : &X86::VR256RegClass);
1431 for (
auto VT : { MVT::v8f32, MVT::v4f64 }) {
1492 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1498 if (VT == MVT::v4i64)
continue;
1519 for (
auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1530 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1550 for (
auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1551 MVT::v2f64, MVT::v4f64 }) {
1557 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1598 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1606 for (
auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1628 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1629 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1636 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1637 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1642 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1643 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1654 setF16Action(MVT::v16f16,
Expand);
1670 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1671 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1676 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1677 Subtarget.hasF16C()) {
1678 for (
MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1682 for (
MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
1695 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
1720 if (!Subtarget.hasDQI()) {
1733 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1739 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1742 for (
auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1755 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1758 if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
1759 for (
MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1768 if (!Subtarget.useSoftFloat() && Subtarget.
useAVX512Regs()) {
1769 bool HasBWI = Subtarget.hasBWI();
1789 for (
MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1802 if (Subtarget.hasDQI())
1805 for (
MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1812 for (
MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1849 if (!Subtarget.hasVLX()) {
1850 for (
auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1851 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1877 for (
auto VT : { MVT::v16f32, MVT::v8f64 }) {
1894 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1921 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1945 for (
auto VT : { MVT::v16i32, MVT::v8i64 }) {
1954 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
1975 if (Subtarget.hasDQI()) {
1983 if (Subtarget.hasCDI()) {
1985 for (
auto VT : { MVT::v16i32, MVT::v8i64} ) {
1990 if (Subtarget.hasVPOPCNTDQ()) {
1991 for (
auto VT : { MVT::v16i32, MVT::v8i64 })
1998 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1999 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
2002 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
2003 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
2014 setF16Action(MVT::v32f16,
Expand);
2022 for (
auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
2029 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
2038 if (Subtarget.hasVBMI2()) {
2039 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
2053 if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
2054 for (
auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
2064 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
2072 if (Subtarget.hasDQI()) {
2077 "Unexpected operation action!");
2085 for (
auto VT : { MVT::v2i64, MVT::v4i64 }) {
2093 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2102 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2103 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2106 if (Subtarget.hasDQI()) {
2117 if (Subtarget.hasCDI()) {
2118 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2123 if (Subtarget.hasVPOPCNTDQ()) {
2124 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2131 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2135 for (
auto VT : { MVT::v32i1, MVT::v64i1 }) {
2148 for (
auto VT : { MVT::v16i1, MVT::v32i1 })
2156 for (
auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2165 if (Subtarget.hasBITALG()) {
2166 for (
auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2171 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2172 auto setGroup = [&] (
MVT VT) {
2239 setGroup(MVT::v32f16);
2278 if (Subtarget.hasVLX()) {
2279 setGroup(MVT::v8f16);
2280 setGroup(MVT::v16f16);
2323 if (!Subtarget.useSoftFloat() &&
2324 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2326 : &X86::VR128RegClass);
2328 : &X86::VR256RegClass);
2334 for (
auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2335 setF16Action(VT,
Expand);
2349 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2351 setF16Action(MVT::v32bf16,
Expand);
2361 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2374 if (Subtarget.hasBWI()) {
2379 if (Subtarget.hasFP16()) {
2411 if (!Subtarget.useSoftFloat() && Subtarget.hasAMXTILE()) {
2419 if (!Subtarget.is64Bit()) {
2429 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2430 if (VT == MVT::i64 && !Subtarget.is64Bit())
2474 if (Subtarget.is32Bit() &&
2611 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2618 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.
hasAVX512() &&
2619 !Subtarget.hasBWI())
2644 bool AssumeSingleUse) {
2645 if (!AssumeSingleUse && !
Op.hasOneUse())
2651 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2652 if (!Subtarget.
hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
2653 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() <
Align(16))
2664 bool AssumeSingleUse) {
2665 assert(Subtarget.
hasAVX() &&
"Expected AVX for broadcast from memory");
2671 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2672 return !Ld->isVolatile() ||
2681 if (
Op.hasOneUse()) {
2682 unsigned Opcode =
Op.getNode()->use_begin()->getOpcode();
2695 default:
return false;
2736 default:
return false;
2757 int ReturnAddrIndex = FuncInfo->
getRAIndex();
2759 if (ReturnAddrIndex == 0) {
2772 bool HasSymbolicDisplacement) {
2779 if (!HasSymbolicDisplacement)
2797 return Offset < 16 * 1024 * 1024;
2821 switch (SetCCOpcode) {
2846 if (SetCCOpcode ==
ISD::SETGT && RHSC->isAllOnes()) {
2851 if (SetCCOpcode ==
ISD::SETLT && RHSC->isZero()) {
2855 if (SetCCOpcode ==
ISD::SETGE && RHSC->isZero()) {
2859 if (SetCCOpcode ==
ISD::SETLT && RHSC->isOne()) {
2874 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
2878 switch (SetCCOpcode) {
2894 switch (SetCCOpcode) {
2948 unsigned Intrinsic)
const {
2954 switch (Intrinsic) {
2955 case Intrinsic::x86_aesenc128kl:
2956 case Intrinsic::x86_aesdec128kl:
2958 Info.ptrVal =
I.getArgOperand(1);
2963 case Intrinsic::x86_aesenc256kl:
2964 case Intrinsic::x86_aesdec256kl:
2966 Info.ptrVal =
I.getArgOperand(1);
2971 case Intrinsic::x86_aesencwide128kl:
2972 case Intrinsic::x86_aesdecwide128kl:
2974 Info.ptrVal =
I.getArgOperand(0);
2979 case Intrinsic::x86_aesencwide256kl:
2980 case Intrinsic::x86_aesdecwide256kl:
2982 Info.ptrVal =
I.getArgOperand(0);
2987 case Intrinsic::x86_cmpccxadd32:
2988 case Intrinsic::x86_cmpccxadd64:
2989 case Intrinsic::x86_atomic_bts:
2990 case Intrinsic::x86_atomic_btc:
2991 case Intrinsic::x86_atomic_btr: {
2993 Info.ptrVal =
I.getArgOperand(0);
2994 unsigned Size =
I.getType()->getScalarSizeInBits();
3001 case Intrinsic::x86_atomic_bts_rm:
3002 case Intrinsic::x86_atomic_btc_rm:
3003 case Intrinsic::x86_atomic_btr_rm: {
3005 Info.ptrVal =
I.getArgOperand(0);
3006 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
3013 case Intrinsic::x86_aadd32:
3014 case Intrinsic::x86_aadd64:
3015 case Intrinsic::x86_aand32:
3016 case Intrinsic::x86_aand64:
3017 case Intrinsic::x86_aor32:
3018 case Intrinsic::x86_aor64:
3019 case Intrinsic::x86_axor32:
3020 case Intrinsic::x86_axor64:
3021 case Intrinsic::x86_atomic_add_cc:
3022 case Intrinsic::x86_atomic_sub_cc:
3023 case Intrinsic::x86_atomic_or_cc:
3024 case Intrinsic::x86_atomic_and_cc:
3025 case Intrinsic::x86_atomic_xor_cc: {
3027 Info.ptrVal =
I.getArgOperand(0);
3028 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
3039 switch (IntrData->
Type) {
3044 Info.ptrVal =
I.getArgOperand(0);
3050 ScalarVT = MVT::i16;
3052 ScalarVT = MVT::i32;
3062 Info.ptrVal =
nullptr;
3074 Info.ptrVal =
nullptr;
3095 bool ForCodeSize)
const {
3096 for (
const APFloat &FPImm : LegalFPImmediates)
3097 if (Imm.bitwiseIsEqual(FPImm))
3105 assert(cast<LoadSDNode>(Load)->
isSimple() &&
"illegal to narrow");
3109 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
3111 if (
const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
3117 EVT VT = Load->getValueType(0);
3119 for (
auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
3121 if (UI.getUse().getResNo() != 0)
3143 if (BitSize == 0 || BitSize > 64)