68#define DEBUG_TYPE "x86-isel"
71 "x86-experimental-pref-innermost-loop-alignment",
cl::init(4),
73 "Sets the preferable loop alignment for experiments (as log2 bytes) "
74 "for innermost loops only. If specified, this option overrides "
75 "alignment set by x86-experimental-pref-loop-alignment."),
79 "x86-br-merging-base-cost",
cl::init(2),
81 "Sets the cost threshold for when multiple conditionals will be merged "
82 "into one branch versus be split in multiple branches. Merging "
83 "conditionals saves branches at the cost of additional instructions. "
84 "This value sets the instruction cost limit, below which conditionals "
85 "will be merged, and above which conditionals will be split. Set to -1 "
86 "to never merge branches."),
90 "x86-br-merging-ccmp-bias",
cl::init(6),
91 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that the target "
92 "supports conditional compare instructions."),
97 cl::desc(
"Replacte narrow shifts with wider shifts."),
101 "x86-br-merging-likely-bias",
cl::init(0),
102 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that it is likely "
103 "that all conditionals will be executed. For example for merging "
104 "the conditionals (a == b && c > d), if its known that a == b is "
105 "likely, then it is likely that if the conditionals are split "
106 "both sides will be executed, so it may be desirable to increase "
107 "the instruction cost threshold. Set to -1 to never merge likely "
112 "x86-br-merging-unlikely-bias",
cl::init(-1),
114 "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
115 "that all conditionals will be executed. For example for merging "
116 "the conditionals (a == b && c > d), if its known that a == b is "
117 "unlikely, then it is unlikely that if the conditionals are split "
118 "both sides will be executed, so it may be desirable to decrease "
119 "the instruction cost threshold. Set to -1 to never merge unlikely "
124 "mul-constant-optimization",
cl::init(
true),
125 cl::desc(
"Replace 'mul x, Const' with more effective instructions like "
132 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
149 if (Subtarget.isAtom())
151 else if (Subtarget.is64Bit())
160 if (Subtarget.hasSlowDivide32())
162 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
168 static const struct {
170 const char *
const Name;
180 for (
const auto &LC : LibraryCalls) {
201 if (Subtarget.is64Bit())
218 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
227 if (Subtarget.is64Bit())
236 if (Subtarget.is64Bit())
244 if (Subtarget.is64Bit())
255 if (Subtarget.is64Bit())
259 if (!Subtarget.useSoftFloat()) {
323 if (!Subtarget.is64Bit()) {
332 for (
MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
337 if (Subtarget.is64Bit()) {
343 if (Subtarget.hasAVX10_2()) {
346 if (Subtarget.is64Bit()) {
363 if (Subtarget.is64Bit()) {
368 }
else if (!Subtarget.is64Bit())
381 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
392 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
393 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
397 if (Subtarget.is64Bit())
408 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
426 if (!Subtarget.hasBMI()) {
429 if (Subtarget.is64Bit()) {
436 if (Subtarget.hasLZCNT()) {
442 for (
auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
443 if (VT == MVT::i64 && !Subtarget.is64Bit())
457 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ?
Custom :
Expand);
464 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
469 for (
MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
482 if (Subtarget.is64Bit())
484 if (Subtarget.hasPOPCNT()) {
498 if (!Subtarget.hasMOVBE())
502 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
508 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
509 if (VT == MVT::i64 && !Subtarget.is64Bit())
529 for (
auto VT : { MVT::i32, MVT::i64 }) {
530 if (VT == MVT::i64 && !Subtarget.is64Bit())
541 for (
auto VT : { MVT::i32, MVT::i64 }) {
542 if (VT == MVT::i64 && !Subtarget.is64Bit())
555 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
565 if (!Subtarget.is64Bit())
568 if (Subtarget.is64Bit() && Subtarget.
hasAVX()) {
601 bool Is64Bit = Subtarget.is64Bit();
653 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
657 : &X86::FR16RegClass);
659 : &X86::FR32RegClass);
661 : &X86::FR64RegClass);
669 for (
auto VT : { MVT::f32, MVT::f64 }) {
690 setF16Action(MVT::f16,
Promote);
737 }
else if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1() &&
738 (UseX87 || Is64Bit)) {
776 for (
auto VT : { MVT::f32, MVT::f64 }) {
789 if (UseX87 && (
getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
790 addLegalFPImmediate(
APFloat(+0.0f));
791 addLegalFPImmediate(
APFloat(+1.0f));
792 addLegalFPImmediate(
APFloat(-0.0f));
793 addLegalFPImmediate(
APFloat(-1.0f));
795 addLegalFPImmediate(
APFloat(+0.0f));
800 addLegalFPImmediate(
APFloat(+0.0));
801 addLegalFPImmediate(
APFloat(+1.0));
802 addLegalFPImmediate(
APFloat(-0.0));
803 addLegalFPImmediate(
APFloat(-1.0));
805 addLegalFPImmediate(
APFloat(+0.0));
836 addLegalFPImmediate(TmpFlt);
838 addLegalFPImmediate(TmpFlt);
844 addLegalFPImmediate(TmpFlt2);
846 addLegalFPImmediate(TmpFlt2);
894 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.
hasSSE1()) {
896 : &X86::VR128RegClass);
973 for (
auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
974 MVT::v4f32, MVT::v8f32, MVT::v16f32,
975 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
1058 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
1063 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1()) {
1065 : &X86::VR128RegClass);
1091 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
1093 : &X86::VR128RegClass);
1098 : &X86::VR128RegClass);
1100 : &X86::VR128RegClass);
1102 : &X86::VR128RegClass);
1104 : &X86::VR128RegClass);
1106 : &X86::VR128RegClass);
1108 for (
auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
1113 for (
auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1114 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1149 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1172 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1192 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1200 for (
auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1205 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1211 setF16Action(MVT::v8f16,
Expand);
1236 for (
auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1310 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1314 if (VT == MVT::v2i64)
continue;
1328 if (Subtarget.hasGFNI()) {
1335 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSSE3()) {
1340 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1352 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE41()) {
1353 for (
MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1393 for (
auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1408 if (Subtarget.is64Bit() && !Subtarget.
hasAVX512()) {
1420 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE42()) {
1424 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1425 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1426 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1432 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1436 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX()) {
1440 : &X86::VR256RegClass);
1442 : &X86::VR256RegClass);
1444 : &X86::VR256RegClass);
1446 : &X86::VR256RegClass);
1448 : &X86::VR256RegClass);
1450 : &X86::VR256RegClass);
1452 : &X86::VR256RegClass);
1454 for (
auto VT : { MVT::v8f32, MVT::v4f64 }) {
1516 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1522 if (VT == MVT::v4i64)
continue;
1543 for (
auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1554 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1574 for (
auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1575 MVT::v2f64, MVT::v4f64 }) {
1581 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1622 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1630 for (
auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1652 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1653 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1660 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1661 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1666 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1667 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1678 setF16Action(MVT::v16f16,
Expand);
1694 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1695 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1700 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1701 Subtarget.hasF16C()) {
1702 for (
MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1706 for (
MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
1721 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
1749 if (!Subtarget.hasDQI()) {
1762 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1768 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1771 for (
auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1784 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1787 if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
1788 for (
MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1797 if (!Subtarget.useSoftFloat() && Subtarget.
useAVX512Regs()) {
1798 bool HasBWI = Subtarget.hasBWI();
1818 for (
MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1832 if (Subtarget.hasDQI())
1835 for (
MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1842 for (
MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1879 if (!Subtarget.hasVLX()) {
1880 for (
auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1881 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1907 for (
auto VT : { MVT::v16f32, MVT::v8f64 }) {
1924 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1951 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1975 for (
auto VT : { MVT::v16i32, MVT::v8i64 }) {
1984 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
2005 if (Subtarget.hasDQI()) {
2013 if (Subtarget.hasCDI()) {
2015 for (
auto VT : { MVT::v16i32, MVT::v8i64} ) {
2020 if (Subtarget.hasVPOPCNTDQ()) {
2021 for (
auto VT : { MVT::v16i32, MVT::v8i64 })
2028 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
2029 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
2032 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
2033 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
2044 setF16Action(MVT::v32f16,
Expand);
2053 for (
auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
2060 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
2069 if (Subtarget.hasVBMI2()) {
2070 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
2084 if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
2085 for (
auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
2095 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
2103 if (Subtarget.hasDQI()) {
2108 "Unexpected operation action!");
2116 for (
auto VT : { MVT::v2i64, MVT::v4i64 }) {
2124 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2133 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2134 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2137 if (Subtarget.hasDQI()) {
2148 if (Subtarget.hasCDI()) {
2149 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2154 if (Subtarget.hasVPOPCNTDQ()) {
2155 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2162 for (
MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2163 MVT::v4f64, MVT::v2i64, MVT::v2f64, MVT::v16i8, MVT::v8i16,
2164 MVT::v16i16, MVT::v8i8})
2169 for (
MVT VT : {MVT::v16i32, MVT::v16f32, MVT::v8i64, MVT::v8f64})
2173 if (Subtarget.hasVLX())
2174 for (
MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2175 MVT::v4f64, MVT::v2i64, MVT::v2f64})
2179 if (Subtarget.hasVBMI2())
2180 for (
MVT VT : {MVT::v32i16, MVT::v64i8})
2184 if (Subtarget.hasVBMI2() && Subtarget.hasVLX())
2185 for (
MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v32i8, MVT::v16i16})
2191 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2195 for (
auto VT : { MVT::v32i1, MVT::v64i1 }) {
2208 for (
auto VT : { MVT::v16i1, MVT::v32i1 })
2216 for (
auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2225 if (Subtarget.hasBITALG()) {
2226 for (
auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2231 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2232 auto setGroup = [&] (
MVT VT) {
2299 setGroup(MVT::v32f16);
2341 if (Subtarget.hasVLX()) {
2342 setGroup(MVT::v8f16);
2343 setGroup(MVT::v16f16);
2392 if (!Subtarget.useSoftFloat() &&
2393 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2395 : &X86::VR128RegClass);
2397 : &X86::VR256RegClass);
2403 for (
auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2404 setF16Action(VT,
Expand);
2405 if (!Subtarget.hasBF16())
2422 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16() &&
2425 setF16Action(MVT::v32bf16,
Expand);
2436 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) {
2437 for (
auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2446 if (Subtarget.hasAVX10_2_512()) {
2455 for (
auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
2461 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2474 if (Subtarget.hasBWI()) {
2479 if (Subtarget.hasFP16()) {
2511 if (!Subtarget.useSoftFloat() && Subtarget.hasAMXTILE()) {
2519 if (!Subtarget.is64Bit()) {
2529 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2530 if (VT == MVT::i64 && !Subtarget.is64Bit())
2574 if (Subtarget.is32Bit() &&
2712 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2719 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.
hasAVX512() &&
2720 !Subtarget.hasBWI())
2745 bool AssumeSingleUse) {
2746 if (!AssumeSingleUse && !
Op.hasOneUse())
2752 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2753 if (!Subtarget.
hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
2754 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() <
Align(16))
2765 bool AssumeSingleUse) {
2766 assert(Subtarget.
hasAVX() &&
"Expected AVX for broadcast from memory");
2772 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2773 return !Ld->isVolatile() ||
2782 if (
Op.hasOneUse()) {
2783 unsigned Opcode =
Op.getNode()->user_begin()->getOpcode();
2796 default:
return false;
2837 default:
return false;
2858 int ReturnAddrIndex = FuncInfo->
getRAIndex();
2860 if (ReturnAddrIndex == 0) {
2873 bool HasSymbolicDisplacement) {
2880 if (!HasSymbolicDisplacement)
2898 return Offset < 16 * 1024 * 1024;
2922 switch (SetCCOpcode) {
2947 if (SetCCOpcode ==
ISD::SETGT && RHSC->isAllOnes()) {
2952 if (SetCCOpcode ==
ISD::SETLT && RHSC->isZero()) {
2956 if (SetCCOpcode ==
ISD::SETGE && RHSC->isZero()) {
2960 if (SetCCOpcode ==
ISD::SETLT && RHSC->isOne()) {
2975 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
2979 switch (SetCCOpcode) {
2995 switch (SetCCOpcode) {
3049 unsigned Intrinsic)
const {
3055 switch (Intrinsic) {
3056 case Intrinsic::x86_aesenc128kl:
3057 case Intrinsic::x86_aesdec128kl:
3059 Info.ptrVal =
I.getArgOperand(1);
3064 case Intrinsic::x86_aesenc256kl:
3065 case Intrinsic::x86_aesdec256kl:
3067 Info.ptrVal =
I.getArgOperand(1);
3072 case Intrinsic::x86_aesencwide128kl:
3073 case Intrinsic::x86_aesdecwide128kl:
3075 Info.ptrVal =
I.getArgOperand(0);
3080 case Intrinsic::x86_aesencwide256kl:
3081 case Intrinsic::x86_aesdecwide256kl:
3083 Info.ptrVal =
I.getArgOperand(0);
3088 case Intrinsic::x86_cmpccxadd32:
3089 case Intrinsic::x86_cmpccxadd64:
3090 case Intrinsic::x86_atomic_bts:
3091 case Intrinsic::x86_atomic_btc:
3092 case Intrinsic::x86_atomic_btr: {
3094 Info.ptrVal =
I.getArgOperand(0);
3095 unsigned Size =
I.getType()->getScalarSizeInBits();
<