70#define DEBUG_TYPE "x86-isel"
73 "x86-experimental-pref-innermost-loop-alignment",
cl::init(4),
75 "Sets the preferable loop alignment for experiments (as log2 bytes) "
76 "for innermost loops only. If specified, this option overrides "
77 "alignment set by x86-experimental-pref-loop-alignment."),
81 "mul-constant-optimization",
cl::init(
true),
82 cl::desc(
"Replace 'mul x, Const' with more effective instructions like "
87 "x86-experimental-unordered-atomic-isel",
cl::init(
false),
88 cl::desc(
"Use LoadSDNode and StoreSDNode instead of "
89 "AtomicSDNode for unordered atomic loads and "
90 "stores respectively."),
96 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
109 if (Subtarget.isAtom())
111 else if (Subtarget.is64Bit())
120 if (Subtarget.hasSlowDivide32())
122 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
128 static const struct {
130 const char *
const Name;
140 for (
const auto &LC : LibraryCalls) {
167 if (Subtarget.is64Bit())
184 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
193 if (Subtarget.is64Bit())
202 if (Subtarget.is64Bit())
210 if (Subtarget.is64Bit())
221 if (Subtarget.is64Bit())
225 if (!Subtarget.useSoftFloat()) {
289 if (!Subtarget.is64Bit()) {
298 for (
MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
302 if (Subtarget.is64Bit()) {
316 if (Subtarget.is64Bit()) {
321 }
else if (!Subtarget.is64Bit())
334 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
345 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
346 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
350 if (Subtarget.is64Bit())
361 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
379 if (!Subtarget.hasBMI()) {
382 if (Subtarget.is64Bit()) {
388 if (Subtarget.hasLZCNT()) {
394 for (
auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
395 if (VT == MVT::i64 && !Subtarget.is64Bit())
409 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ?
Custom :
Expand);
416 for (
MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
429 if (Subtarget.is64Bit())
431 if (Subtarget.hasPOPCNT()) {
440 if (Subtarget.is64Bit())
448 if (!Subtarget.hasMOVBE())
452 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
458 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
459 if (VT == MVT::i64 && !Subtarget.is64Bit())
479 for (
auto VT : { MVT::i32, MVT::i64 }) {
480 if (VT == MVT::i64 && !Subtarget.is64Bit())
491 for (
auto VT : { MVT::i32, MVT::i64 }) {
492 if (VT == MVT::i64 && !Subtarget.is64Bit())
505 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
515 if (!Subtarget.is64Bit())
544 bool Is64Bit = Subtarget.is64Bit();
594 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
598 : &X86::FR16RegClass);
600 : &X86::FR32RegClass);
602 : &X86::FR64RegClass);
610 for (
auto VT : { MVT::f32, MVT::f64 }) {
631 setF16Action(MVT::f16,
Promote);
677 }
else if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1() &&
678 (UseX87 || Is64Bit)) {
716 for (
auto VT : { MVT::f32, MVT::f64 }) {
729 if (UseX87 && (
getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
730 addLegalFPImmediate(
APFloat(+0.0f));
731 addLegalFPImmediate(
APFloat(+1.0f));
732 addLegalFPImmediate(
APFloat(-0.0f));
733 addLegalFPImmediate(
APFloat(-1.0f));
735 addLegalFPImmediate(
APFloat(+0.0f));
740 addLegalFPImmediate(
APFloat(+0.0));
741 addLegalFPImmediate(
APFloat(+1.0));
742 addLegalFPImmediate(
APFloat(-0.0));
743 addLegalFPImmediate(
APFloat(-1.0));
745 addLegalFPImmediate(
APFloat(+0.0));
776 addLegalFPImmediate(TmpFlt);
778 addLegalFPImmediate(TmpFlt);
784 addLegalFPImmediate(TmpFlt2);
786 addLegalFPImmediate(TmpFlt2);
823 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.
hasSSE1()) {
825 : &X86::VR128RegClass);
897 for (
auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
898 MVT::v4f32, MVT::v8f32, MVT::v16f32,
899 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
978 if (!Subtarget.useSoftFloat() && Subtarget.
hasMMX()) {
983 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1()) {
985 : &X86::VR128RegClass);
1009 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
1011 : &X86::VR128RegClass);
1016 : &X86::VR128RegClass);
1018 : &X86::VR128RegClass);
1020 : &X86::VR128RegClass);
1022 : &X86::VR128RegClass);
1024 : &X86::VR128RegClass);
1026 for (
auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
1031 for (
auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1032 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1064 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1094 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1112 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1120 for (
auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1125 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1131 setF16Action(MVT::v8f16,
Expand);
1153 for (
auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1227 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1231 if (VT == MVT::v2i64)
continue;
1245 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSSE3()) {
1262 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE41()) {
1263 for (
MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1289 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1308 for (
auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1323 if (Subtarget.is64Bit() && !Subtarget.
hasAVX512()) {
1335 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE42()) {
1339 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1340 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1341 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1347 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1350 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1351 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1355 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX()) {
1359 : &X86::VR256RegClass);
1361 : &X86::VR256RegClass);
1363 : &X86::VR256RegClass);
1365 : &X86::VR256RegClass);
1367 : &X86::VR256RegClass);
1369 : &X86::VR256RegClass);
1371 : &X86::VR256RegClass);
1373 for (
auto VT : { MVT::v8f32, MVT::v4f64 }) {
1431 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1437 if (VT == MVT::v4i64)
continue;
1458 for (
auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1471 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1490 for (
auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1491 MVT::v2f64, MVT::v4f64 }) {
1497 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1538 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1546 for (
auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1568 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1569 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1576 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1577 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1582 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1583 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1594 setF16Action(MVT::v16f16,
Expand);
1607 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1608 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1613 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1614 Subtarget.hasF16C()) {
1615 for (
MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1619 for (
MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
1632 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
1657 if (!Subtarget.hasDQI()) {
1670 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1676 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1679 for (
auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1692 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1699 if (!Subtarget.useSoftFloat() && Subtarget.
useAVX512Regs()) {
1700 bool HasBWI = Subtarget.hasBWI();
1720 for (
MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1730 for (
MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1737 for (
MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1774 if (!Subtarget.hasVLX()) {
1775 for (
auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1776 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1802 for (
auto VT : { MVT::v16f32, MVT::v8f64 }) {
1819 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1848 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1871 for (
auto VT : { MVT::v16i32, MVT::v8i64 }) {
1880 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
1901 if (Subtarget.hasDQI()) {
1909 if (Subtarget.hasCDI()) {
1911 for (
auto VT : { MVT::v16i32, MVT::v8i64} ) {
1916 if (Subtarget.hasVPOPCNTDQ()) {
1917 for (
auto VT : { MVT::v16i32, MVT::v8i64 })
1924 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1925 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1928 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1929 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
1940 setF16Action(MVT::v32f16,
Expand);
1950 for (
auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1957 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
1966 if (Subtarget.hasVBMI2()) {
1967 for (
auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1968 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1969 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1984 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
1992 if (Subtarget.hasDQI()) {
1997 "Unexpected operation action!");
2005 for (
auto VT : { MVT::v2i64, MVT::v4i64 }) {
2013 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2022 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2023 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2026 if (Subtarget.hasDQI()) {
2037 if (Subtarget.hasCDI()) {
2038 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2043 if (Subtarget.hasVPOPCNTDQ()) {
2044 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2051 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2055 for (
auto VT : { MVT::v32i1, MVT::v64i1 }) {
2068 for (
auto VT : { MVT::v16i1, MVT::v32i1 })
2076 for (
auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2085 if (Subtarget.hasBITALG()) {
2086 for (
auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2091 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2092 auto setGroup = [&] (
MVT VT) {
2157 setGroup(MVT::v32f16);
2196 if (Subtarget.hasVLX()) {
2197 setGroup(MVT::v8f16);
2198 setGroup(MVT::v16f16);
2241 if (!Subtarget.useSoftFloat() &&
2242 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2244 : &X86::VR128RegClass);
2246 : &X86::VR256RegClass);
2252 for (
auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2253 setF16Action(VT,
Expand);
2265 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2267 setF16Action(MVT::v32bf16,
Expand);
2277 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2290 if (Subtarget.hasBWI()) {
2295 if (Subtarget.hasFP16()) {
2327 if (Subtarget.hasAMXTILE()) {
2335 if (!Subtarget.is64Bit()) {
2345 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2346 if (VT == MVT::i64 && !Subtarget.is64Bit())
2364 if (!Subtarget.is64Bit()) {
2402 if (Subtarget.is32Bit() &&
2512 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2519 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.
hasAVX512() &&
2520 !Subtarget.hasBWI())
2545 bool AssumeSingleUse) {
2546 if (!AssumeSingleUse && !
Op.hasOneUse())
2552 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2553 if (!Subtarget.
hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
2554 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() <
Align(16))
2565 bool AssumeSingleUse) {
2566 assert(Subtarget.
hasAVX() &&
"Expected AVX for broadcast from memory");
2572 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2573 return !Ld->isVolatile() ||
2582 if (
Op.hasOneUse()) {
2583 unsigned Opcode =
Op.getNode()->use_begin()->getOpcode();
2591 default:
return false;
2632 default:
return false;
2653 int ReturnAddrIndex = FuncInfo->
getRAIndex();
2655 if (ReturnAddrIndex == 0) {
2668 bool hasSymbolicDisplacement) {
2675 if (!hasSymbolicDisplacement)
2718 switch (SetCCOpcode) {
2741 if (SetCCOpcode ==
ISD::SETGT && RHSC->isAllOnes()) {
2746 if (SetCCOpcode ==
ISD::SETLT && RHSC->isZero()) {
2750 if (SetCCOpcode ==
ISD::SETGE && RHSC->isZero()) {
2754 if (SetCCOpcode ==
ISD::SETLT && RHSC->isOne()) {
2769 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
2773 switch (SetCCOpcode) {
2789 switch (SetCCOpcode) {
2841 unsigned Intrinsic)
const {
2847 switch (Intrinsic) {
2848 case Intrinsic::x86_aesenc128kl:
2849 case Intrinsic::x86_aesdec128kl:
2851 Info.ptrVal =
I.getArgOperand(1);
2856 case Intrinsic::x86_aesenc256kl:
2857 case Intrinsic::x86_aesdec256kl:
2859 Info.ptrVal =
I.getArgOperand(1);
2864 case Intrinsic::x86_aesencwide128kl:
2865 case Intrinsic::x86_aesdecwide128kl:
2867 Info.ptrVal =
I.getArgOperand(0);
2872 case Intrinsic::x86_aesencwide256kl:
2873 case Intrinsic::x86_aesdecwide256kl:
2875 Info.ptrVal =
I.getArgOperand(0);
2880 case Intrinsic::x86_cmpccxadd32:
2881 case Intrinsic::x86_cmpccxadd64:
2882 case Intrinsic::x86_atomic_bts:
2883 case Intrinsic::x86_atomic_btc:
2884 case Intrinsic::x86_atomic_btr: {
2886 Info.ptrVal =
I.getArgOperand(0);
2887 unsigned Size =
I.getType()->getScalarSizeInBits();
2894 case Intrinsic::x86_atomic_bts_rm:
2895 case Intrinsic::x86_atomic_btc_rm:
2896 case Intrinsic::x86_atomic_btr_rm: {
2898 Info.ptrVal =
I.getArgOperand(0);
2899 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
2906 case Intrinsic::x86_aadd32:
2907 case Intrinsic::x86_aadd64:
2908 case Intrinsic::x86_aand32:
2909 case Intrinsic::x86_aand64:
2910 case Intrinsic::x86_aor32:
2911 case Intrinsic::x86_aor64:
2912 case Intrinsic::x86_axor32:
2913 case Intrinsic::x86_axor64:
2914 case Intrinsic::x86_atomic_add_cc:
2915 case Intrinsic::x86_atomic_sub_cc:
2916 case Intrinsic::x86_atomic_or_cc:
2917 case Intrinsic::x86_atomic_and_cc:
2918 case Intrinsic::x86_atomic_xor_cc: {
2920 Info.ptrVal =
I.getArgOperand(0);
2921 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
2932 switch (IntrData->
Type) {
2937 Info.ptrVal =
I.getArgOperand(0);
2943 ScalarVT = MVT::i16;
2945 ScalarVT = MVT::i32;
2955 Info.ptrVal =
nullptr;
2967 Info.ptrVal =
nullptr;
2988 bool ForCodeSize)
const {
2989 for (
const APFloat &FPImm : LegalFPImmediates)
2990 if (Imm.bitwiseIsEqual(FPImm))
2998 assert(cast<LoadSDNode>(Load)->
isSimple() &&
"illegal to narrow");
3002 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
3004 if (
const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
3010 EVT VT = Load->getValueType(0);
3012 for (
auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
3014 if (UI.getUse().getResNo() != 0)
3036 if (BitSize == 0 || BitSize > 64)
3083 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
3087 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
3088 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
3092 unsigned Index)
const {
3134 return Subtarget.hasBMI() ||
3140 return Subtarget.hasLZCNT();
3147 return !Subtarget.
hasSSE2() || VT == MVT::f80;
3151 return (VT == MVT::f64 && Subtarget.
hasSSE2()) ||
3152 (VT == MVT::f32 && Subtarget.
hasSSE1()) || VT == MVT::f16;
3162 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
3180 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;