LLVM 19.0.0git
X86ISelLowering.cpp
Go to the documentation of this file.
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
43#include "llvm/IR/CallingConv.h"
44#include "llvm/IR/Constants.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Intrinsics.h"
54#include "llvm/MC/MCAsmInfo.h"
55#include "llvm/MC/MCContext.h"
56#include "llvm/MC/MCExpr.h"
57#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/Debug.h"
64#include <algorithm>
65#include <bitset>
66#include <cctype>
67#include <numeric>
68using namespace llvm;
69
70#define DEBUG_TYPE "x86-isel"
71
73 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
75 "Sets the preferable loop alignment for experiments (as log2 bytes) "
76 "for innermost loops only. If specified, this option overrides "
77 "alignment set by x86-experimental-pref-loop-alignment."),
79
81 "x86-br-merging-base-cost", cl::init(2),
83 "Sets the cost threshold for when multiple conditionals will be merged "
84 "into one branch versus be split in multiple branches. Merging "
85 "conditionals saves branches at the cost of additional instructions. "
86 "This value sets the instruction cost limit, below which conditionals "
87 "will be merged, and above which conditionals will be split. Set to -1 "
88 "to never merge branches."),
90
92 "x86-br-merging-ccmp-bias", cl::init(6),
93 cl::desc("Increases 'x86-br-merging-base-cost' in cases that the target "
94 "supports conditional compare instructions."),
96
98 "x86-br-merging-likely-bias", cl::init(0),
99 cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
100 "that all conditionals will be executed. For example for merging "
101 "the conditionals (a == b && c > d), if its known that a == b is "
102 "likely, then it is likely that if the conditionals are split "
103 "both sides will be executed, so it may be desirable to increase "
104 "the instruction cost threshold. Set to -1 to never merge likely "
105 "branches."),
106 cl::Hidden);
107
109 "x86-br-merging-unlikely-bias", cl::init(-1),
110 cl::desc(
111 "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
112 "that all conditionals will be executed. For example for merging "
113 "the conditionals (a == b && c > d), if its known that a == b is "
114 "unlikely, then it is unlikely that if the conditionals are split "
115 "both sides will be executed, so it may be desirable to decrease "
116 "the instruction cost threshold. Set to -1 to never merge unlikely "
117 "branches."),
118 cl::Hidden);
119
121 "mul-constant-optimization", cl::init(true),
122 cl::desc("Replace 'mul x, Const' with more effective instructions like "
123 "SHIFT, LEA, etc."),
124 cl::Hidden);
125
127 const X86Subtarget &STI)
128 : TargetLowering(TM), Subtarget(STI) {
129 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
130 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
131
132 // Set up the TargetLowering object.
133
134 // X86 is weird. It always uses i8 for shift amounts and setcc results.
136 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
138
139 // X86 instruction cache is coherent with its data cache so we can use the
140 // default expansion to a no-op.
142
143 // For 64-bit, since we have so many registers, use the ILP scheduler.
144 // For 32-bit, use the register pressure specific scheduling.
145 // For Atom, always use ILP scheduling.
146 if (Subtarget.isAtom())
148 else if (Subtarget.is64Bit())
150 else
152 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
154
155 // Bypass expensive divides and use cheaper ones.
156 if (TM.getOptLevel() >= CodeGenOptLevel::Default) {
157 if (Subtarget.hasSlowDivide32())
158 addBypassSlowDiv(32, 8);
159 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
160 addBypassSlowDiv(64, 32);
161 }
162
163 // Setup Windows compiler runtime calls.
164 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
165 static const struct {
166 const RTLIB::Libcall Op;
167 const char * const Name;
168 const CallingConv::ID CC;
169 } LibraryCalls[] = {
170 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
171 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
172 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
173 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
174 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
175 };
176
177 for (const auto &LC : LibraryCalls) {
178 setLibcallName(LC.Op, LC.Name);
179 setLibcallCallingConv(LC.Op, LC.CC);
180 }
181 }
182
183 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
184 // MSVCRT doesn't have powi; fall back to pow
185 setLibcallName(RTLIB::POWI_F32, nullptr);
186 setLibcallName(RTLIB::POWI_F64, nullptr);
187 }
188
189 if (Subtarget.canUseCMPXCHG16B())
191 else if (Subtarget.canUseCMPXCHG8B())
193 else
195
196 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
197
199
200 // Set up the register classes.
201 addRegisterClass(MVT::i8, &X86::GR8RegClass);
202 addRegisterClass(MVT::i16, &X86::GR16RegClass);
203 addRegisterClass(MVT::i32, &X86::GR32RegClass);
204 if (Subtarget.is64Bit())
205 addRegisterClass(MVT::i64, &X86::GR64RegClass);
206
207 for (MVT VT : MVT::integer_valuetypes())
209
210 // We don't accept any truncstore of integer registers.
211 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
212 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
213 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
214 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
215 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
216 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
217
218 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
219
220 // SETOEQ and SETUNE require checking two conditions.
221 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
224 }
225
226 // Integer absolute.
227 if (Subtarget.canUseCMOV()) {
228 setOperationAction(ISD::ABS , MVT::i16 , Custom);
229 setOperationAction(ISD::ABS , MVT::i32 , Custom);
230 if (Subtarget.is64Bit())
231 setOperationAction(ISD::ABS , MVT::i64 , Custom);
232 }
233
234 // Absolute difference.
235 for (auto Op : {ISD::ABDS, ISD::ABDU}) {
236 setOperationAction(Op , MVT::i8 , Custom);
237 setOperationAction(Op , MVT::i16 , Custom);
238 setOperationAction(Op , MVT::i32 , Custom);
239 if (Subtarget.is64Bit())
240 setOperationAction(Op , MVT::i64 , Custom);
241 }
242
243 // Signed saturation subtraction.
247 if (Subtarget.is64Bit())
249
250 // Funnel shifts.
251 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
252 // For slow shld targets we only lower for code size.
253 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
254
255 setOperationAction(ShiftOp , MVT::i8 , Custom);
256 setOperationAction(ShiftOp , MVT::i16 , Custom);
257 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
258 if (Subtarget.is64Bit())
259 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
260 }
261
262 if (!Subtarget.useSoftFloat()) {
263 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
264 // operation.
269 // We have an algorithm for SSE2, and we turn this into a 64-bit
270 // FILD or VCVTUSI2SS/SD for other targets.
273 // We have an algorithm for SSE2->double, and we turn this into a
274 // 64-bit FILD followed by conditional FADD for other targets.
277
278 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
279 // this operation.
282 // SSE has no i16 to fp conversion, only i32. We promote in the handler
283 // to allow f80 to use i16 and f64 to use i16 with sse1 only
286 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
289 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
290 // are Legal, f80 is custom lowered.
293
294 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
295 // this operation.
297 // FIXME: This doesn't generate invalid exception when it should. PR44019.
303 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
304 // are Legal, f80 is custom lowered.
307
308 // Handle FP_TO_UINT by promoting the destination to a larger signed
309 // conversion.
311 // FIXME: This doesn't generate invalid exception when it should. PR44019.
314 // FIXME: This doesn't generate invalid exception when it should. PR44019.
320
325
326 if (!Subtarget.is64Bit()) {
329 }
330 }
331
332 if (Subtarget.hasSSE2()) {
333 // Custom lowering for saturating float to int conversions.
334 // We handle promotion to larger result types manually.
335 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
338 }
339 if (Subtarget.is64Bit()) {
342 }
343 }
344
345 // Handle address space casts between mixed sized pointers.
348
349 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
350 if (!Subtarget.hasSSE2()) {
353 if (Subtarget.is64Bit()) {
355 // Without SSE, i64->f64 goes through memory.
357 }
358 } else if (!Subtarget.is64Bit())
360
361 // Scalar integer divide and remainder are lowered to use operations that
362 // produce two results, to match the available instructions. This exposes
363 // the two-result form to trivial CSE, which is able to combine x/y and x%y
364 // into a single instruction.
365 //
366 // Scalar integer multiply-high is also lowered to use two-result
367 // operations, to match the available instructions. However, plain multiply
368 // (low) operations are left as Legal, as there are single-result
369 // instructions for this in x86. Using the two-result multiply instructions
370 // when both high and low results are needed must be arranged by dagcombine.
371 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
378 }
379
380 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
382 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
383 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
386 }
387 if (Subtarget.is64Bit())
392
393 setOperationAction(ISD::FREM , MVT::f32 , Expand);
394 setOperationAction(ISD::FREM , MVT::f64 , Expand);
395 setOperationAction(ISD::FREM , MVT::f80 , Expand);
396 setOperationAction(ISD::FREM , MVT::f128 , Expand);
397
398 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
404 }
405
406 // Promote the i8 variants and force them on up to i32 which has a shorter
407 // encoding.
408 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
410 // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
411 // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
412 // promote that too.
413 setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
415
416 if (!Subtarget.hasBMI()) {
417 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
419 if (Subtarget.is64Bit()) {
420 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
422 }
423 }
424
425 if (Subtarget.hasLZCNT()) {
426 // When promoting the i8 variants, force them to i32 for a shorter
427 // encoding.
428 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
430 } else {
431 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
432 if (VT == MVT::i64 && !Subtarget.is64Bit())
433 continue;
436 }
437 }
438
441 // Special handling for half-precision floating point conversions.
442 // If we don't have F16C support, then lower half float conversions
443 // into library calls.
445 Op, MVT::f32,
446 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
447 // There's never any support for operations beyond MVT::f32.
448 setOperationAction(Op, MVT::f64, Expand);
449 setOperationAction(Op, MVT::f80, Expand);
450 setOperationAction(Op, MVT::f128, Expand);
451 }
452
453 for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
456 }
457
458 for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
459 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
460 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
461 setTruncStoreAction(VT, MVT::f16, Expand);
462 setTruncStoreAction(VT, MVT::bf16, Expand);
463
466 }
467
471 if (Subtarget.is64Bit())
473 if (Subtarget.hasPOPCNT()) {
474 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
475 // popcntw is longer to encode than popcntl and also has a false dependency
476 // on the dest that popcntl hasn't had since Cannon Lake.
477 setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
478 } else {
483 }
484
486
487 if (!Subtarget.hasMOVBE())
489
490 // X86 wants to expand cmov itself.
491 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
496 }
497 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
498 if (VT == MVT::i64 && !Subtarget.is64Bit())
499 continue;
502 }
503
504 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
507
509 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
510 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
514 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
515 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
516
517 // Darwin ABI issue.
518 for (auto VT : { MVT::i32, MVT::i64 }) {
519 if (VT == MVT::i64 && !Subtarget.is64Bit())
520 continue;
527 }
528
529 // 64-bit shl, sra, srl (iff 32-bit x86)
530 for (auto VT : { MVT::i32, MVT::i64 }) {
531 if (VT == MVT::i64 && !Subtarget.is64Bit())
532 continue;
536 }
537
538 if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
540
542
543 // Expand certain atomics
544 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
552 }
553
554 if (!Subtarget.is64Bit())
556
557 if (Subtarget.is64Bit() && Subtarget.hasAVX()) {
558 // All CPUs supporting AVX will atomically load/store aligned 128-bit
559 // values, so we can emit [V]MOVAPS/[V]MOVDQA.
562 }
563
564 if (Subtarget.canUseCMPXCHG16B())
566
567 // FIXME - use subtarget debug flags
568 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
569 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
570 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
572 }
573
576
579
580 setOperationAction(ISD::TRAP, MVT::Other, Legal);
582 if (Subtarget.isTargetPS())
584 else
586
587 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
589 setOperationAction(ISD::VAEND , MVT::Other, Expand);
590 bool Is64Bit = Subtarget.is64Bit();
591 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
592 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
593
596
598
599 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
602
604
605 auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
606 setOperationAction(ISD::FABS, VT, Action);
607 setOperationAction(ISD::FNEG, VT, Action);
609 setOperationAction(ISD::FREM, VT, Action);
610 setOperationAction(ISD::FMA, VT, Action);
611 setOperationAction(ISD::FMINNUM, VT, Action);
612 setOperationAction(ISD::FMAXNUM, VT, Action);
615 setOperationAction(ISD::FSIN, VT, Action);
616 setOperationAction(ISD::FCOS, VT, Action);
617 setOperationAction(ISD::FSINCOS, VT, Action);
618 setOperationAction(ISD::FTAN, VT, Action);
619 setOperationAction(ISD::FSQRT, VT, Action);
620 setOperationAction(ISD::FPOW, VT, Action);
621 setOperationAction(ISD::FLOG, VT, Action);
622 setOperationAction(ISD::FLOG2, VT, Action);
623 setOperationAction(ISD::FLOG10, VT, Action);
624 setOperationAction(ISD::FEXP, VT, Action);
625 setOperationAction(ISD::FEXP2, VT, Action);
626 setOperationAction(ISD::FEXP10, VT, Action);
627 setOperationAction(ISD::FCEIL, VT, Action);
628 setOperationAction(ISD::FFLOOR, VT, Action);
630 setOperationAction(ISD::FRINT, VT, Action);
631 setOperationAction(ISD::BR_CC, VT, Action);
632 setOperationAction(ISD::SETCC, VT, Action);
635 setOperationAction(ISD::FROUND, VT, Action);
637 setOperationAction(ISD::FTRUNC, VT, Action);
638 setOperationAction(ISD::FLDEXP, VT, Action);
639 };
640
641 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
642 // f16, f32 and f64 use SSE.
643 // Set up the FP register classes.
644 addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
645 : &X86::FR16RegClass);
646 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
647 : &X86::FR32RegClass);
648 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
649 : &X86::FR64RegClass);
650
651 // Disable f32->f64 extload as we can only generate this in one instruction
652 // under optsize. So its easier to pattern match (fpext (load)) for that
653 // case instead of needing to emit 2 instructions for extload in the
654 // non-optsize case.
655 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
656
657 for (auto VT : { MVT::f32, MVT::f64 }) {
658 // Use ANDPD to simulate FABS.
660
661 // Use XORP to simulate FNEG.
663
664 // Use ANDPD and ORPD to simulate FCOPYSIGN.
666
667 // These might be better off as horizontal vector ops.
670
671 // We don't support sin/cos/fmod
675 }
676
677 // Half type will be promoted by default.
678 setF16Action(MVT::f16, Promote);
686
716
717 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
718 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
719
720 // Lower this to MOVMSK plus an AND.
723
724 } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
725 (UseX87 || Is64Bit)) {
726 // Use SSE for f32, x87 for f64.
727 // Set up the FP register classes.
728 addRegisterClass(MVT::f32, &X86::FR32RegClass);
729 if (UseX87)
730 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
731
732 // Use ANDPS to simulate FABS.
734
735 // Use XORP to simulate FNEG.
737
738 if (UseX87)
740
741 // Use ANDPS and ORPS to simulate FCOPYSIGN.
742 if (UseX87)
745
746 // We don't support sin/cos/fmod
750
751 if (UseX87) {
752 // Always expand sin/cos functions even though x87 has an instruction.
756 }
757 } else if (UseX87) {
758 // f32 and f64 in x87.
759 // Set up the FP register classes.
760 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
761 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
762
763 for (auto VT : { MVT::f32, MVT::f64 }) {
766
767 // Always expand sin/cos functions even though x87 has an instruction.
771 }
772 }
773
774 // Expand FP32 immediates into loads from the stack, save special cases.
775 if (isTypeLegal(MVT::f32)) {
776 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
777 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
778 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
779 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
780 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
781 } else // SSE immediates.
782 addLegalFPImmediate(APFloat(+0.0f)); // xorps
783 }
784 // Expand FP64 immediates into loads from the stack, save special cases.
785 if (isTypeLegal(MVT::f64)) {
786 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
787 addLegalFPImmediate(APFloat(+0.0)); // FLD0
788 addLegalFPImmediate(APFloat(+1.0)); // FLD1
789 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
790 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
791 } else // SSE immediates.
792 addLegalFPImmediate(APFloat(+0.0)); // xorpd
793 }
794 // Support fp16 0 immediate.
795 if (isTypeLegal(MVT::f16))
796 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
797
798 // Handle constrained floating-point operations of scalar.
811
812 // We don't support FMA.
815
816 // f80 always uses X87.
817 if (UseX87) {
818 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
821 {
823 addLegalFPImmediate(TmpFlt); // FLD0
824 TmpFlt.changeSign();
825 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
826
827 bool ignored;
828 APFloat TmpFlt2(+1.0);
830 &ignored);
831 addLegalFPImmediate(TmpFlt2); // FLD1
832 TmpFlt2.changeSign();
833 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
834 }
835
836 // Always expand sin/cos functions even though x87 has an instruction.
837 // clang-format off
842 // clang-format on
843
855
856 // Handle constrained floating-point operations of scalar.
862 if (isTypeLegal(MVT::f16)) {
865 } else {
867 }
868 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
869 // as Custom.
871 }
872
873 // f128 uses xmm registers, but most operations require libcalls.
874 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
875 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
876 : &X86::VR128RegClass);
877
878 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
879
890
894
895 // clang-format off
903 // clang-format on
904 // No STRICT_FSINCOS
907
910 // We need to custom handle any FP_ROUND with an f128 input, but
911 // LegalizeDAG uses the result type to know when to run a custom handler.
912 // So we have to list all legal floating point result types here.
913 if (isTypeLegal(MVT::f32)) {
916 }
917 if (isTypeLegal(MVT::f64)) {
920 }
921 if (isTypeLegal(MVT::f80)) {
924 }
925
927
928 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
929 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
930 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
931 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
932 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
933 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
934 }
935
936 // Always use a library call for pow.
937 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
938 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
939 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
940 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
941
950
951 // Some FP actions are always expanded for vector types.
952 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
953 MVT::v4f32, MVT::v8f32, MVT::v16f32,
954 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
955 // clang-format off
969 // clang-format on
970 }
971
972 // First set operation action for all vector types to either promote
973 // (for widening) or expand (for scalarization). Then we will selectively
974 // turn on ones that can be effectively codegen'd.
1014 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1015 setTruncStoreAction(InnerVT, VT, Expand);
1016
1017 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
1018 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
1019
1020 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
1021 // types, we have to deal with them whether we ask for Expansion or not.
1022 // Setting Expand causes its own optimisation problems though, so leave
1023 // them legal.
1024 if (VT.getVectorElementType() == MVT::i1)
1025 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1026
1027 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
1028 // split/scalarized right now.
1029 if (VT.getVectorElementType() == MVT::f16 ||
1030 VT.getVectorElementType() == MVT::bf16)
1031 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1032 }
1033 }
1034
1035 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
1036 // with -msoft-float, disable use of MMX as well.
1037 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
1038 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
1039 // No operations on x86mmx supported, everything uses intrinsics.
1040 }
1041
1042 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
1043 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1044 : &X86::VR128RegClass);
1045
1048
1049 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
1050 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
1057
1058 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
1059 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
1060
1066 }
1067
1068 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
1069 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1070 : &X86::VR128RegClass);
1071
1072 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
1073 // registers cannot be used even for integer operations.
1074 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
1075 : &X86::VR128RegClass);
1076 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1077 : &X86::VR128RegClass);
1078 addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1079 : &X86::VR128RegClass);
1080 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1081 : &X86::VR128RegClass);
1082 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1083 : &X86::VR128RegClass);
1084
1085 for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
1088 }
1089
1090 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1091 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1096 }
1097
1098 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
1099 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
1100 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
1101
1102 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1103 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1104 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1105 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
1106 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
1107 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
1108 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
1109 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
1110 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
1111 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1114
1115 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
1116 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
1117 setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
1118
1119 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1120 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1122
1123 setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
1124
1125 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1126 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
1127 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
1128 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
1129 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
1130 }
1131
1142
1147
1148 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1154
1155 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1156 // setcc all the way to isel and prefer SETGT in some isel patterns.
1159 }
1160
1161 setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
1162 setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
1167
1168 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1174 }
1175
1176 for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1180
1181 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1182 continue;
1183
1186 }
1187 setF16Action(MVT::v8f16, Expand);
1188 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
1189 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
1190 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
1191 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
1192 setOperationAction(ISD::FNEG, MVT::v8f16, Custom);
1193 setOperationAction(ISD::FABS, MVT::v8f16, Custom);
1195
1196 // Custom lower v2i64 and v2f64 selects.
1203
1210
1211 // Custom legalize these to avoid over promotion or custom promotion.
1212 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1217 }
1218
1223
1226
1229
1230 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1235
1240
1241 // We want to legalize this to an f64 load rather than an i64 load on
1242 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1243 // store.
1244 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1245 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1246 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1247 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1248 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1250
1251 // Add 32-bit vector stores to help vectorization opportunities.
1252 setOperationAction(ISD::STORE, MVT::v2i16, Custom);
1254
1258 if (!Subtarget.hasAVX512())
1260
1264
1266
1283
1284 // In the customized shift lowering, the legal v4i32/v2i64 cases
1285 // in AVX2 will be recognized.
1286 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1290 if (VT == MVT::v2i64) continue;
1295 }
1296
1302 }
1303
1304 if (Subtarget.hasGFNI()) {
1309 }
1310
1311 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1312 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1313 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1314 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1315
1316 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1319 }
1320
1321 // These might be better off as horizontal vector ops.
1326 }
1327
1328 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1329 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1332 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1336 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1342
1344 }
1345
1346 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1347 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1348 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1349 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1350 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1351 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1352 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1353 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1354
1358
1359 // FIXME: Do we need to handle scalar-to-vector here?
1360 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1361 setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
1362
1363 // We directly match byte blends in the backend as they match the VSELECT
1364 // condition form.
1366
1367 // SSE41 brings specific instructions for doing vector sign extend even in
1368 // cases where we don't have SRA.
1369 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1372 }
1373
1374 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1375 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1376 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1377 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1378 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1379 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1380 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1381 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1382 }
1383
1384 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1385 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1386 // do the pre and post work in the vector domain.
1389 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1390 // so that DAG combine doesn't try to turn it into uint_to_fp.
1393 }
1394 }
1395
1396 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1398 }
1399
1400 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1401 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1402 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1405 }
1406
1407 // XOP can efficiently perform BITREVERSE with VPPERM.
1408 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1410 }
1411
1412 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1413 bool HasInt256 = Subtarget.hasInt256();
1414
1415 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1416 : &X86::VR256RegClass);
1417 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1418 : &X86::VR256RegClass);
1419 addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1420 : &X86::VR256RegClass);
1421 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1422 : &X86::VR256RegClass);
1423 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1424 : &X86::VR256RegClass);
1425 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1426 : &X86::VR256RegClass);
1427 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1428 : &X86::VR256RegClass);
1429
1430 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1443
1445
1449
1452 }
1453
1454 setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
1455 setOperationAction(ISD::LRINT, MVT::v4f64, Custom);
1456
1457 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1458 // even though v8i16 is a legal type.
1459 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1460 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1461 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1462 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1466
1473
1485
1486 if (!Subtarget.hasAVX512())
1488
1489 // In the customized shift lowering, the legal v8i32/v4i64 cases
1490 // in AVX2 will be recognized.
1491 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1497 if (VT == MVT::v4i64) continue;
1502 }
1503
1504 // These types need custom splitting if their input is a 128-bit vector.
1509
1513 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1514 setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
1517
1518 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1522 }
1523
1528
1529 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1534
1535 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1536 // setcc all the way to isel and prefer SETGT in some isel patterns.
1539 }
1540
1541 setOperationAction(ISD::SETCC, MVT::v4f64, Custom);
1542 setOperationAction(ISD::SETCC, MVT::v8f32, Custom);
1547
1548 if (Subtarget.hasAnyFMA()) {
1549 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1550 MVT::v2f64, MVT::v4f64 }) {
1553 }
1554 }
1555
1556 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1557 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1558 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1559 }
1560
1561 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1562 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1563 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1564 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1565
1566 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1567 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1568 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1569 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1570 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1571 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1572 setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
1573 setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
1574
1575 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1576 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1577
1578 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1579 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1580 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1581 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1582 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1583
1584 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1585 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1586 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1587 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1588 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1589 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1590 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1591 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1596
1597 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1598 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1599 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1600 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1601 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1602 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1603 }
1604
1605 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1608 }
1609
1610 if (HasInt256) {
1611 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1612 // when we have a 256bit-wide blend with immediate.
1615
1616 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1617 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1618 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1619 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1620 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1621 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1622 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1623 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1624 }
1625 }
1626
1627 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1628 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1629 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1631 }
1632
1633 // Extract subvector is special because the value type
1634 // (result) is 128-bit but the source is 256-bit wide.
1635 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1636 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1638 }
1639
1640 // Custom lower several nodes for 256-bit types.
1641 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1642 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1652 }
1653 setF16Action(MVT::v16f16, Expand);
1654 setOperationAction(ISD::FNEG, MVT::v16f16, Custom);
1655 setOperationAction(ISD::FABS, MVT::v16f16, Custom);
1657 setOperationAction(ISD::FADD, MVT::v16f16, Expand);
1658 setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
1659 setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
1660 setOperationAction(ISD::FDIV, MVT::v16f16, Expand);
1661
1662 if (HasInt256) {
1664
1665 // Custom legalize 2x32 to get a little better code.
1668
1669 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1670 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1672 }
1673 }
1674
1675 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1676 Subtarget.hasF16C()) {
1677 for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1680 }
1681 for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
1684 }
1685 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1686 setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
1687 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1688 }
1689 }
1690
1691 // This block controls legalization of the mask vector sizes that are
1692 // available with AVX512. 512-bit vectors are in a separate block controlled
1693 // by useAVX512Regs.
1694 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1695 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1696 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1697 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1698 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1699 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1700
1704
1705 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1706 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1707 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1708 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1709 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1710 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1711 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1712 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1717
1718 // There is no byte sized k-register load or store without AVX512DQ.
1719 if (!Subtarget.hasDQI()) {
1720 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1721 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1722 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1723 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1724
1729 }
1730
1731 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1732 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1736 }
1737
1738 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1740
1741 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1745
1752 }
1753
1754 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1756 }
1757 if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
1758 for (MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1761 }
1762 }
1763
1764 // This block controls legalization for 512-bit operations with 8/16/32/64 bit
1765 // elements. 512-bits can be disabled based on prefer-vector-width and
1766 // required-vector-width function attributes.
1767 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1768 bool HasBWI = Subtarget.hasBWI();
1769
1770 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1771 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1772 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1773 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1774 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1775 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1776 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1777
1778 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1779 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1780 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1781 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1782 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1783 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1784 if (HasBWI)
1785 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1786 }
1787
1788 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1796 }
1797 setOperationAction(ISD::LRINT, MVT::v16f32,
1798 Subtarget.hasDQI() ? Legal : Custom);
1799 setOperationAction(ISD::LRINT, MVT::v8f64,
1800 Subtarget.hasDQI() ? Legal : Custom);
1801 if (Subtarget.hasDQI())
1802 setOperationAction(ISD::LLRINT, MVT::v8f64, Legal);
1803
1804 for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1809 }
1810
1811 for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1816 }
1817
1824
1836
1837 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1838 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1839 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1840 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1841 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1842 if (HasBWI)
1843 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1844
1845 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1846 // to 512-bit rather than use the AVX2 instructions so that we can use
1847 // k-masks.
1848 if (!Subtarget.hasVLX()) {
1849 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1850 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1853 }
1854 }
1855
1857 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1858 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1868
1869 if (HasBWI) {
1870 // Extends from v64i1 masks to 512-bit vectors.
1874 }
1875
1876 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1889
1891 }
1892
1893 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1896 }
1897
1898 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1899 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1900 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1901 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1902
1903 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1904 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1905 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1906 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1907
1908 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1909 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1910 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1911 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1912 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1913 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1914 setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
1915 setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
1916
1917 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1918 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1919
1920 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1930
1931 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1932 // setcc all the way to isel and prefer SETGT in some isel patterns.
1935 }
1936
1937 setOperationAction(ISD::SETCC, MVT::v8f64, Custom);
1938 setOperationAction(ISD::SETCC, MVT::v16f32, Custom);
1943
1944 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1951 }
1952
1953 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1954 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1955 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1957 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1958 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1959 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1960 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1965 }
1966
1967 setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
1968 setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
1969 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1970 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1971 setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
1972 setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
1973
1974 if (Subtarget.hasDQI()) {
1978 setOperationAction(Opc, MVT::v8i64, Custom);
1979 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1980 }
1981
1982 if (Subtarget.hasCDI()) {
1983 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1984 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1986 }
1987 } // Subtarget.hasCDI()
1988
1989 if (Subtarget.hasVPOPCNTDQ()) {
1990 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1992 }
1993
1994 // Extract subvector is special because the value type
1995 // (result) is 256-bit but the source is 512-bit wide.
1996 // 128-bit was made Legal under AVX1.
1997 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1998 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
2000
2001 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
2002 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
2012 }
2013 setF16Action(MVT::v32f16, Expand);
2018 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV})
2019 setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
2020
2021 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
2026 }
2027 if (HasBWI) {
2028 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
2031 }
2032 } else {
2033 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
2034 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
2035 }
2036
2037 if (Subtarget.hasVBMI2()) {
2038 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
2041 }
2042
2043 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
2044 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
2045 }
2046
2047 setOperationAction(ISD::FNEG, MVT::v32f16, Custom);
2048 setOperationAction(ISD::FABS, MVT::v32f16, Custom);
2050 }// useAVX512Regs
2051
2052 if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
2053 for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
2054 MVT::v4i64}) {
2057 }
2058 }
2059
2060 // This block controls legalization for operations that don't have
2061 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
2062 // narrower widths.
2063 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
2064 // These operations are handled on non-VLX by artificially widening in
2065 // isel patterns.
2066
2070
2071 if (Subtarget.hasDQI()) {
2072 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
2073 // v2f32 UINT_TO_FP is already custom under SSE2.
2076 "Unexpected operation action!");
2077 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
2082 }
2083
2084 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
2090 }
2091
2092 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2095 }
2096
2097 // Custom legalize 2x32 to get a little better code.
2100
2101 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2102 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2104
2105 if (Subtarget.hasDQI()) {
2109 setOperationAction(Opc, MVT::v2i64, Custom);
2110 setOperationAction(Opc, MVT::v4i64, Custom);
2111 }
2112 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
2113 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
2114 }
2115
2116 if (Subtarget.hasCDI()) {
2117 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2119 }
2120 } // Subtarget.hasCDI()
2121
2122 if (Subtarget.hasVPOPCNTDQ()) {
2123 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2125 }
2126 }
2127
2128 // This block control legalization of v32i1/v64i1 which are available with
2129 // AVX512BW..
2130 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2131 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
2132 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
2133
2134 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
2145 }
2146
2147 for (auto VT : { MVT::v16i1, MVT::v32i1 })
2149
2150 // Extends from v32i1 masks to 256-bit vectors.
2154
2155 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2156 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
2157 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
2158 }
2159
2160 // These operations are handled on non-VLX by artificially widening in
2161 // isel patterns.
2162 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
2163
2164 if (Subtarget.hasBITALG()) {
2165 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2167 }
2168 }
2169
2170 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2171 auto setGroup = [&] (MVT VT) {
2182
2195
2197
2200
2206
2212
2216 };
2217
2218 // AVX512_FP16 scalar operations
2219 setGroup(MVT::f16);
2233
2236
2237 if (Subtarget.useAVX512Regs()) {
2238 setGroup(MVT::v32f16);
2244 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
2251
2256 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
2258 MVT::v32i16);
2259 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
2261 MVT::v32i16);
2262 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
2264 MVT::v32i16);
2265 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
2267 MVT::v32i16);
2268
2272
2273 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2274 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2275 }
2276
2277 if (Subtarget.hasVLX()) {
2278 setGroup(MVT::v8f16);
2279 setGroup(MVT::v16f16);
2280
2291
2302
2303 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2306
2310
2311 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2312 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2313 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2314 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2315
2316 // Need to custom widen these to prevent scalarization.
2317 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2318 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2319 }
2320 }
2321
2322 if (!Subtarget.useSoftFloat() &&
2323 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2324 addRegisterClass(MVT::v8bf16, Subtarget.hasAVX512() ? &X86::VR128XRegClass
2325 : &X86::VR128RegClass);
2326 addRegisterClass(MVT::v16bf16, Subtarget.hasAVX512() ? &X86::VR256XRegClass
2327 : &X86::VR256RegClass);
2328 // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
2329 // provide the method to promote BUILD_VECTOR and INSERT_VECTOR_ELT.
2330 // Set the operation action Custom to do the customization later.
2333 for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2334 setF16Action(VT, Expand);
2339 }
2340 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
2341 setOperationPromotedToType(Opc, MVT::v8bf16, MVT::v8f32);
2342 setOperationPromotedToType(Opc, MVT::v16bf16, MVT::v16f32);
2343 }
2345 addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
2346 }
2347
2348 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2349 addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
2350 setF16Action(MVT::v32bf16, Expand);
2351 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV})
2352 setOperationPromotedToType(Opc, MVT::v32bf16, MVT::v32f32);
2354 setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom);
2358 }
2359
2360 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2361 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2362 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2363 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2364 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2365 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2366
2367 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2368 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2369 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2370 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2371 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2372
2373 if (Subtarget.hasBWI()) {
2374 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2375 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2376 }
2377
2378 if (Subtarget.hasFP16()) {
2379 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2388 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2397 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2402 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2407 }
2408 }
2409
2410 if (!Subtarget.useSoftFloat() && Subtarget.hasAMXTILE()) {
2411 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2412 }
2413
2414 // We want to custom lower some of our intrinsics.
2418 if (!Subtarget.is64Bit()) {
2420 }
2421
2422 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2423 // handle type legalization for these operations here.
2424 //
2425 // FIXME: We really should do custom legalization for addition and
2426 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2427 // than generic legalization for 64-bit multiplication-with-overflow, though.
2428 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2429 if (VT == MVT::i64 && !Subtarget.is64Bit())
2430 continue;
2431 // Add/Sub/Mul with overflow operations are custom lowered.
2438
2439 // Support carry in as value rather than glue.
2445 }
2446
2447 if (!Subtarget.is64Bit()) {
2448 // These libcalls are not available in 32-bit.
2449 setLibcallName(RTLIB::SHL_I128, nullptr);
2450 setLibcallName(RTLIB::SRL_I128, nullptr);
2451 setLibcallName(RTLIB::SRA_I128, nullptr);
2452 setLibcallName(RTLIB::MUL_I128, nullptr);
2453 // The MULO libcall is not part of libgcc, only compiler-rt.
2454 setLibcallName(RTLIB::MULO_I64, nullptr);
2455 }
2456 // The MULO libcall is not part of libgcc, only compiler-rt.
2457 setLibcallName(RTLIB::MULO_I128, nullptr);
2458
2459 // Combine sin / cos into _sincos_stret if it is available.
2460 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2461 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2464 }
2465
2466 if (Subtarget.isTargetWin64()) {
2467 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2468 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2469 setOperationAction(ISD::SREM, MVT::i128, Custom);
2470 setOperationAction(ISD::UREM, MVT::i128, Custom);
2479 }
2480
2481 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2482 // is. We should promote the value to 64-bits to solve this.
2483 // This is what the CRT headers do - `fmodf` is an inline header
2484 // function casting to f64 and calling `fmod`.
2485 if (Subtarget.is32Bit() &&
2486 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2487 // clang-format off
2488 for (ISD::NodeType Op :
2499 if (isOperationExpand(Op, MVT::f32))
2500 setOperationAction(Op, MVT::f32, Promote);
2501 // clang-format on
2502
2503 // We have target-specific dag combine patterns for the following nodes:
2514 ISD::SHL,
2515 ISD::SRA,
2516 ISD::SRL,
2517 ISD::OR,
2518 ISD::AND,
2524 ISD::ADD,
2525 ISD::FADD,
2526 ISD::FSUB,
2527 ISD::FNEG,
2528 ISD::FMA,
2532 ISD::SUB,
2533 ISD::LOAD,
2534 ISD::LRINT,
2536 ISD::MLOAD,
2537 ISD::STORE,
2551 ISD::SETCC,
2552 ISD::MUL,
2553 ISD::XOR,
2561
2563
2564 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2566 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2568 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2570
2571 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2572 // that needs to benchmarked and balanced with the potential use of vector
2573 // load/store types (PR33329, PR33914).
2576
2577 // Default loop alignment, which can be overridden by -align-loops.
2579
2580 // An out-of-order CPU can speculatively execute past a predictable branch,
2581 // but a conditional move could be stalled by an expensive earlier operation.
2582 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2583 EnableExtLdPromotion = true;
2585
2587
2588 // Default to having -disable-strictnode-mutation on
2589 IsStrictFPEnabled = true;
2590}
2591
2592// This has so far only been implemented for 64-bit MachO.
2594 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2595}
2596
2598 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2599 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2600}
2601
2603 const SDLoc &DL) const {
2604 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2605 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2606 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2607 return SDValue(Node, 0);
2608}
2609
2612 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2613 !Subtarget.hasBWI())
2614 return TypeSplitVector;
2615
2616 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2617 !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
2618 return TypeSplitVector;
2619
2620 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2621 VT.getVectorElementType() != MVT::i1)
2622 return TypeWidenVector;
2623
2625}
2626
2627FastISel *
2629 const TargetLibraryInfo *libInfo) const {
2630 return X86::createFastISel(funcInfo, libInfo);
2631}
2632
2633//===----------------------------------------------------------------------===//
2634// Other Lowering Hooks
2635//===----------------------------------------------------------------------===//
2636
2638 bool AssumeSingleUse) {
2639 if (!AssumeSingleUse && !Op.hasOneUse())
2640 return false;
2641 if (!ISD::isNormalLoad(Op.getNode()))
2642 return false;
2643
2644 // If this is an unaligned vector, make sure the target supports folding it.
2645 auto *Ld = cast<LoadSDNode>(Op.getNode());
2646 if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
2647 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
2648 return false;
2649
2650 // TODO: If this is a non-temporal load and the target has an instruction
2651 // for it, it should not be folded. See "useNonTemporalLoad()".
2652
2653 return true;
2654}
2655
2657 const X86Subtarget &Subtarget,
2658 bool AssumeSingleUse) {
2659 assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory");
2660 if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
2661 return false;
2662
2663 // We can not replace a wide volatile load with a broadcast-from-memory,
2664 // because that would narrow the load, which isn't legal for volatiles.
2665 auto *Ld = cast<LoadSDNode>(Op.getNode());
2666 return !Ld->isVolatile() ||
2667 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
2668}
2669
2671 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
2672}
2673
2675 if (Op.hasOneUse()) {
2676 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
2677 return (ISD::ZERO_EXTEND == Opcode);
2678 }
2679 return false;
2680}
2681
2682static bool isLogicOp(unsigned Opcode) {
2683 // TODO: Add support for X86ISD::FAND/FOR/FXOR/FANDN with test coverage.
2684 return ISD::isBitwiseLogicOp(Opcode) || X86ISD::ANDNP == Opcode;
2685}
2686
2687static bool isTargetShuffle(unsigned Opcode) {
2688 switch(Opcode) {
2689 default: return false;
2690 case X86ISD::BLENDI:
2691 case X86ISD::PSHUFB:
2692 case X86ISD::PSHUFD:
2693 case X86ISD::PSHUFHW:
2694 case X86ISD::PSHUFLW:
2695 case X86ISD::SHUFP:
2696 case X86ISD::INSERTPS:
2697 case X86ISD::EXTRQI:
2698 case X86ISD::INSERTQI:
2699 case X86ISD::VALIGN:
2700 case X86ISD::PALIGNR:
2701 case X86ISD::VSHLDQ:
2702 case X86ISD::VSRLDQ:
2703 case X86ISD::MOVLHPS:
2704 case X86ISD::MOVHLPS:
2705 case X86ISD::MOVSHDUP:
2706 case X86ISD::MOVSLDUP:
2707 case X86ISD::MOVDDUP:
2708 case X86ISD::MOVSS:
2709 case X86ISD::MOVSD:
2710 case X86ISD::MOVSH:
2711 case X86ISD::UNPCKL:
2712 case X86ISD::UNPCKH:
2713 case X86ISD::VBROADCAST:
2714 case X86ISD::VPERMILPI:
2715 case X86ISD::VPERMILPV:
2716 case X86ISD::VPERM2X128:
2717 case X86ISD::SHUF128:
2718 case X86ISD::VPERMIL2:
2719 case X86ISD::VPERMI:
2720 case X86ISD::VPPERM:
2721 case X86ISD::VPERMV:
2722 case X86ISD::VPERMV3:
2723 case X86ISD::VZEXT_MOVL:
2724 return true;
2725 }
2726}
2727
2728static bool isTargetShuffleVariableMask(unsigned Opcode) {
2729 switch (Opcode) {
2730 default: return false;
2731 // Target Shuffles.
2732 case X86ISD::PSHUFB:
2733 case X86ISD::VPERMILPV:
2734 case X86ISD::VPERMIL2:
2735 case X86ISD::VPPERM:
2736 case X86ISD::VPERMV:
2737 case X86ISD::VPERMV3:
2738 return true;
2739 // 'Faux' Target Shuffles.
2740 case ISD::OR:
2741 case ISD::AND:
2742 case X86ISD::ANDNP:
2743 return true;
2744 }
2745}
2746
2749 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2751 int ReturnAddrIndex = FuncInfo->getRAIndex();
2752
2753 if (ReturnAddrIndex == 0) {
2754 // Set up a frame object for the return address.
2755 unsigned SlotSize = RegInfo->getSlotSize();
2756 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
2757 -(int64_t)SlotSize,
2758 false);
2759 FuncInfo->setRAIndex(ReturnAddrIndex);
2760 }
2761
2762 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
2763}
2764
2766 bool HasSymbolicDisplacement) {
2767 // Offset should fit into 32 bit immediate field.
2768 if (!isInt<32>(Offset))
2769 return false;
2770
2771 // If we don't have a symbolic displacement - we don't have any extra
2772 // restrictions.
2773 if (!HasSymbolicDisplacement)
2774 return true;
2775
2776 // We can fold large offsets in the large code model because we always use
2777 // 64-bit offsets.
2778 if (CM == CodeModel::Large)
2779 return true;
2780
2781 // For kernel code model we know that all object resist in the negative half
2782 // of 32bits address space. We may not accept negative offsets, since they may
2783 // be just off and we may accept pretty large positive ones.
2784 if (CM == CodeModel::Kernel)
2785 return Offset >= 0;
2786
2787 // For other non-large code models we assume that latest small object is 16MB
2788 // before end of 31 bits boundary. We may also accept pretty large negative
2789 // constants knowing that all objects are in the positive half of address
2790 // space.
2791 return Offset < 16 * 1024 * 1024;
2792}
2793
2794/// Return true if the condition is an signed comparison operation.
2795static bool isX86CCSigned(unsigned X86CC) {
2796 switch (X86CC) {
2797 default:
2798 llvm_unreachable("Invalid integer condition!");
2799 case X86::COND_E:
2800 case X86::COND_NE:
2801 case X86::COND_B:
2802 case X86::COND_A:
2803 case X86::COND_BE:
2804 case X86::COND_AE:
2805 return false;
2806 case X86::COND_G:
2807 case X86::COND_GE:
2808 case X86::COND_L:
2809 case X86::COND_LE:
2810 return true;
2811 }
2812}
2813
2815 switch (SetCCOpcode) {
2816 // clang-format off
2817 default: llvm_unreachable("Invalid integer condition!");
2818 case ISD::SETEQ: return X86::COND_E;
2819 case ISD::SETGT: return X86::COND_G;
2820 case ISD::SETGE: return X86::COND_GE;
2821 case ISD::SETLT: return X86::COND_L;
2822 case ISD::SETLE: return X86::COND_LE;
2823 case ISD::SETNE: return X86::COND_NE;
2824 case ISD::SETULT: return X86::COND_B;
2825 case ISD::SETUGT: return X86::COND_A;
2826 case ISD::SETULE: return X86::COND_BE;
2827 case ISD::SETUGE: return X86::COND_AE;
2828 // clang-format on
2829 }
2830}
2831
2832/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
2833/// condition code, returning the condition code and the LHS/RHS of the
2834/// comparison to make.
2836 bool isFP, SDValue &LHS, SDValue &RHS,
2837 SelectionDAG &DAG) {
2838 if (!isFP) {
2839 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2840 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
2841 // X > -1 -> X == 0, jump !sign.
2842 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2843 return X86::COND_NS;
2844 }
2845 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
2846 // X < 0 -> X == 0, jump on sign.
2847 return X86::COND_S;
2848 }
2849 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
2850 // X >= 0 -> X == 0, jump on !sign.
2851 return X86::COND_NS;
2852 }
2853 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
2854 // X < 1 -> X <= 0
2855 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2856 return X86::COND_LE;
2857 }
2858 }
2859
2860 return TranslateIntegerX86CC(SetCCOpcode);
2861 }
2862
2863 // First determine if it is required or is profitable to flip the operands.
2864
2865 // If LHS is a foldable load, but RHS is not, flip the condition.
2866 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
2867 !ISD::isNON_EXTLoad(RHS.getNode())) {
2868 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
2869 std::swap(LHS, RHS);
2870 }
2871
2872 switch (SetCCOpcode) {
2873 default: break;
2874 case ISD::SETOLT:
2875 case ISD::SETOLE:
2876 case ISD::SETUGT:
2877 case ISD::SETUGE:
2878 std::swap(LHS, RHS);
2879 break;
2880 }
2881
2882 // On a floating point condition, the flags are set as follows:
2883 // ZF PF CF op
2884 // 0 | 0 | 0 | X > Y
2885 // 0 | 0 | 1 | X < Y
2886 // 1 | 0 | 0 | X == Y
2887 // 1 | 1 | 1 | unordered
2888 switch (SetCCOpcode) {
2889 // clang-format off
2890 default: llvm_unreachable("Condcode should be pre-legalized away");
2891 case ISD::SETUEQ:
2892 case ISD::SETEQ: return X86::COND_E;
2893 case ISD::SETOLT: // flipped
2894 case ISD::SETOGT:
2895 case ISD::SETGT: return X86::COND_A;
2896 case ISD::SETOLE: // flipped
2897 case ISD::SETOGE:
2898 case ISD::SETGE: return X86::COND_AE;
2899 case ISD::SETUGT: // flipped
2900 case ISD::SETULT:
2901 case ISD::SETLT: return X86::COND_B;
2902 case ISD::SETUGE: // flipped
2903 case ISD::SETULE:
2904 case ISD::SETLE: return X86::COND_BE;
2905 case ISD::SETONE:
2906 case ISD::SETNE: return X86::COND_NE;
2907 case ISD::SETUO: return X86::COND_P;
2908 case ISD::SETO: return X86::COND_NP;
2909 case ISD::SETOEQ:
2910 case ISD::SETUNE: return X86::COND_INVALID;
2911 // clang-format on
2912 }
2913}
2914
2915/// Is there a floating point cmov for the specific X86 condition code?
2916/// Current x86 isa includes the following FP cmov instructions:
2917/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
2918static bool hasFPCMov(unsigned X86CC) {
2919 switch (X86CC) {
2920 default:
2921 return false;
2922 case X86::COND_B:
2923 case X86::COND_BE:
2924 case X86::COND_E:
2925 case X86::COND_P:
2926 case X86::COND_A:
2927 case X86::COND_AE:
2928 case X86::COND_NE:
2929 case X86::COND_NP:
2930 return true;
2931 }
2932}
2933
2934static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
2935 return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
2936 VT.is512BitVector();
2937}
2938
2940 const CallInst &I,
2941 MachineFunction &MF,
2942 unsigned Intrinsic) const {
2944 Info.offset = 0;
2945
2946 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
2947 if (!IntrData) {
2948 switch (Intrinsic) {
2949 case Intrinsic::x86_aesenc128kl:
2950 case Intrinsic::x86_aesdec128kl:
2952 Info.ptrVal = I.getArgOperand(1);
2953 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
2954 Info.align = Align(1);
2956 return true;
2957 case Intrinsic::x86_aesenc256kl:
2958 case Intrinsic::x86_aesdec256kl:
2960 Info.ptrVal = I.getArgOperand(1);
2961 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
2962 Info.align = Align(1);
2964 return true;
2965 case Intrinsic::x86_aesencwide128kl:
2966 case Intrinsic::x86_aesdecwide128kl:
2968 Info.ptrVal = I.getArgOperand(0);
2969 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
2970 Info.align = Align(1);
2972 return true;
2973 case Intrinsic::x86_aesencwide256kl:
2974 case Intrinsic::x86_aesdecwide256kl:
2976 Info.ptrVal = I.getArgOperand(0);
2977 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
2978 Info.align = Align(1);
2980 return true;
2981 case Intrinsic::x86_cmpccxadd32:
2982 case Intrinsic::x86_cmpccxadd64:
2983 case Intrinsic::x86_atomic_bts:
2984 case Intrinsic::x86_atomic_btc:
2985 case Intrinsic::x86_atomic_btr: {
2987 Info.ptrVal = I.getArgOperand(0);
2988 unsigned Size = I.getType()->getScalarSizeInBits();
2989 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
2990 Info.align = Align(Size);
2993 return true;
2994 }
2995 case Intrinsic::x86_atomic_bts_rm:
2996 case Intrinsic::x86_atomic_btc_rm:
2997 case Intrinsic::x86_atomic_btr_rm: {
2999 Info.ptrVal = I.getArgOperand(0);
3000 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
3001 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
3002 Info.align = Align(Size);
3005 return true;
3006 }
3007 case Intrinsic::x86_aadd32:
3008 case Intrinsic::x86_aadd64:
3009 case Intrinsic::x86_aand32:
3010 case Intrinsic::x86_aand64:
3011 case Intrinsic::x86_aor32:
3012 case Intrinsic::x86_aor64:
3013 case Intrinsic::x86_axor32:
3014 case Intrinsic::x86_axor64:
3015 case Intrinsic::x86_atomic_add_cc:
3016 case Intrinsic::x86_atomic_sub_cc:
3017 case Intrinsic::x86_atomic_or_cc:
3018 case Intrinsic::x86_atomic_and_cc:
3019 case Intrinsic::x86_atomic_xor_cc: {
3021 Info.ptrVal = I.getArgOperand(0);
3022 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
3023 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
3024 Info.align = Align(Size);
3027 return true;
3028 }
3029 }
3030 return false;
3031 }
3032
3033 switch (IntrData->Type) {
3036 case TRUNCATE_TO_MEM_VI32: {
3038 Info.ptrVal = I.getArgOperand(0);
3039 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
3041 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
3042 ScalarVT = MVT::i8;
3043 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
3044 ScalarVT = MVT::i16;
3045 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
3046 ScalarVT = MVT::i32;
3047
3048 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
3049 Info.align = Align(1);
3051 break;
3052 }
3053 case GATHER:
3054 case GATHER_AVX2: {
3056 Info.ptrVal = nullptr;
3057 MVT DataVT = MVT::getVT(I.getType());
3058 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
3059 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
3060 IndexVT.getVectorNumElements());
3061 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
3062 Info.align = Align(1);
3064 break;
3065 }
3066 case SCATTER: {
3068 Info.ptrVal = nullptr;
3069 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
3070 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
3071 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
3072 IndexVT.getVectorNumElements());
3073 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
3074 Info.align = Align(1);
3076 break;
3077 }
3078 default:
3079 return false;
3080 }
3081
3082 return true;
3083}
3084
3085/// Returns true if the target can instruction select the
3086/// specified FP immediate natively. If false, the legalizer will
3087/// materialize the FP immediate as a load from a constant pool.
3089 bool ForCodeSize) const {
3090 for (const APFloat &FPImm : LegalFPImmediates)
3091 if (Imm.bitwiseIsEqual(FPImm))
3092 return true;
3093 return false;
3094}
3095
3097 ISD::LoadExtType ExtTy,
3098 EVT NewVT) const {
3099 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");
3100
3101 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
3102 // relocation target a movq or addq instruction: don't let the load shrink.
3103 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
3104 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
3105 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
3106 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
3107
3108 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
3109 // those uses are extracted directly into a store, then the extract + store
3110 // can be store-folded. Therefore, it's probably not worth splitting the load.
3111 EVT VT = Load->getValueType(0);
3112 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
3113 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
3114 // Skip uses of the chain value. Result 0 of the node is the load value.
3115 if (UI.getUse().getResNo() != 0)
3116 continue;
3117
3118 // If this use is not an extract + store, it's probably worth splitting.
3119 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
3120 UI->use_begin()->getOpcode() != ISD::STORE)
3121 return true;
3122 }
3123 // All non-chain uses are extract + store.
3124 return false;
3125 }
3126
3127 return true;
3128}
3129
3130/// Returns true if it is beneficial to convert a load of a constant
3131/// to just the constant itself.
3133 Type *Ty) const {
3134 assert(Ty->isIntegerTy());
3135
3136 unsigned BitSize = Ty->getPrimitiveSizeInBits();
3137 if (BitSize == 0 || BitSize > 64)
3138 return false;
3139 return true;
3140}
3141
3143 // If we are using XMM registers in the ABI and the condition of the select is
3144 // a floating-point compare and we have blendv or conditional move, then it is
3145 // cheaper to select instead of doing a cross-register move and creating a
3146 // load that depends on the compare result.
3147 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
3148 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
3149}
3150
3152 // TODO: It might be a win to ease or lift this restriction, but the generic
3153 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
3154 if (VT.isVector() && Subtarget.hasAVX512())
3155 return false;
3156
3157 return true;
3158}
3159
3161 SDValue C) const {
3162 // TODO: We handle scalars using custom code, but