LLVM 18.0.0git
X86ISelLowering.cpp
Go to the documentation of this file.
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
43#include "llvm/IR/CallingConv.h"
44#include "llvm/IR/Constants.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Intrinsics.h"
54#include "llvm/MC/MCAsmInfo.h"
55#include "llvm/MC/MCContext.h"
56#include "llvm/MC/MCExpr.h"
57#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/Debug.h"
64#include <algorithm>
65#include <bitset>
66#include <cctype>
67#include <numeric>
68using namespace llvm;
69
70#define DEBUG_TYPE "x86-isel"
71
73 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
75 "Sets the preferable loop alignment for experiments (as log2 bytes) "
76 "for innermost loops only. If specified, this option overrides "
77 "alignment set by x86-experimental-pref-loop-alignment."),
79
81 "mul-constant-optimization", cl::init(true),
82 cl::desc("Replace 'mul x, Const' with more effective instructions like "
83 "SHIFT, LEA, etc."),
85
87 "x86-experimental-unordered-atomic-isel", cl::init(false),
88 cl::desc("Use LoadSDNode and StoreSDNode instead of "
89 "AtomicSDNode for unordered atomic loads and "
90 "stores respectively."),
92
94 const X86Subtarget &STI)
95 : TargetLowering(TM), Subtarget(STI) {
96 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
97 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
98
99 // Set up the TargetLowering object.
100
101 // X86 is weird. It always uses i8 for shift amounts and setcc results.
103 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
105
106 // For 64-bit, since we have so many registers, use the ILP scheduler.
107 // For 32-bit, use the register pressure specific scheduling.
108 // For Atom, always use ILP scheduling.
109 if (Subtarget.isAtom())
111 else if (Subtarget.is64Bit())
113 else
115 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
117
118 // Bypass expensive divides and use cheaper ones.
119 if (TM.getOptLevel() >= CodeGenOptLevel::Default) {
120 if (Subtarget.hasSlowDivide32())
121 addBypassSlowDiv(32, 8);
122 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
123 addBypassSlowDiv(64, 32);
124 }
125
126 // Setup Windows compiler runtime calls.
127 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
128 static const struct {
129 const RTLIB::Libcall Op;
130 const char * const Name;
131 const CallingConv::ID CC;
132 } LibraryCalls[] = {
133 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
134 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
135 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
136 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
137 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
138 };
139
140 for (const auto &LC : LibraryCalls) {
141 setLibcallName(LC.Op, LC.Name);
142 setLibcallCallingConv(LC.Op, LC.CC);
143 }
144 }
145
146 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
147 // MSVCRT doesn't have powi; fall back to pow
148 setLibcallName(RTLIB::POWI_F32, nullptr);
149 setLibcallName(RTLIB::POWI_F64, nullptr);
150 }
151
152 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
153 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
154 // FIXME: Should we be limiting the atomic size on other configs? Default is
155 // 1024.
156 if (!Subtarget.canUseCMPXCHG8B())
158
159 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
160
162
163 // Set up the register classes.
164 addRegisterClass(MVT::i8, &X86::GR8RegClass);
165 addRegisterClass(MVT::i16, &X86::GR16RegClass);
166 addRegisterClass(MVT::i32, &X86::GR32RegClass);
167 if (Subtarget.is64Bit())
168 addRegisterClass(MVT::i64, &X86::GR64RegClass);
169
170 for (MVT VT : MVT::integer_valuetypes())
172
173 // We don't accept any truncstore of integer registers.
174 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
175 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
176 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
177 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
178 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
179 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
180
181 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
182
183 // SETOEQ and SETUNE require checking two conditions.
184 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
187 }
188
189 // Integer absolute.
190 if (Subtarget.canUseCMOV()) {
191 setOperationAction(ISD::ABS , MVT::i16 , Custom);
192 setOperationAction(ISD::ABS , MVT::i32 , Custom);
193 if (Subtarget.is64Bit())
194 setOperationAction(ISD::ABS , MVT::i64 , Custom);
195 }
196
197 // Absolute difference.
198 for (auto Op : {ISD::ABDS, ISD::ABDU}) {
199 setOperationAction(Op , MVT::i8 , Custom);
200 setOperationAction(Op , MVT::i16 , Custom);
201 setOperationAction(Op , MVT::i32 , Custom);
202 if (Subtarget.is64Bit())
203 setOperationAction(Op , MVT::i64 , Custom);
204 }
205
206 // Signed saturation subtraction.
210 if (Subtarget.is64Bit())
212
213 // Funnel shifts.
214 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
215 // For slow shld targets we only lower for code size.
216 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
217
218 setOperationAction(ShiftOp , MVT::i8 , Custom);
219 setOperationAction(ShiftOp , MVT::i16 , Custom);
220 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
221 if (Subtarget.is64Bit())
222 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
223 }
224
225 if (!Subtarget.useSoftFloat()) {
226 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
227 // operation.
232 // We have an algorithm for SSE2, and we turn this into a 64-bit
233 // FILD or VCVTUSI2SS/SD for other targets.
236 // We have an algorithm for SSE2->double, and we turn this into a
237 // 64-bit FILD followed by conditional FADD for other targets.
240
241 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
242 // this operation.
245 // SSE has no i16 to fp conversion, only i32. We promote in the handler
246 // to allow f80 to use i16 and f64 to use i16 with sse1 only
249 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
252 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
253 // are Legal, f80 is custom lowered.
256
257 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
258 // this operation.
260 // FIXME: This doesn't generate invalid exception when it should. PR44019.
266 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
267 // are Legal, f80 is custom lowered.
270
271 // Handle FP_TO_UINT by promoting the destination to a larger signed
272 // conversion.
274 // FIXME: This doesn't generate invalid exception when it should. PR44019.
277 // FIXME: This doesn't generate invalid exception when it should. PR44019.
283
288
289 if (!Subtarget.is64Bit()) {
292 }
293 }
294
295 if (Subtarget.hasSSE2()) {
296 // Custom lowering for saturating float to int conversions.
297 // We handle promotion to larger result types manually.
298 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
301 }
302 if (Subtarget.is64Bit()) {
305 }
306 }
307
308 // Handle address space casts between mixed sized pointers.
311
312 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
313 if (!Subtarget.hasSSE2()) {
316 if (Subtarget.is64Bit()) {
318 // Without SSE, i64->f64 goes through memory.
320 }
321 } else if (!Subtarget.is64Bit())
323
324 // Scalar integer divide and remainder are lowered to use operations that
325 // produce two results, to match the available instructions. This exposes
326 // the two-result form to trivial CSE, which is able to combine x/y and x%y
327 // into a single instruction.
328 //
329 // Scalar integer multiply-high is also lowered to use two-result
330 // operations, to match the available instructions. However, plain multiply
331 // (low) operations are left as Legal, as there are single-result
332 // instructions for this in x86. Using the two-result multiply instructions
333 // when both high and low results are needed must be arranged by dagcombine.
334 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
341 }
342
343 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
345 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
346 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
349 }
350 if (Subtarget.is64Bit())
355
356 setOperationAction(ISD::FREM , MVT::f32 , Expand);
357 setOperationAction(ISD::FREM , MVT::f64 , Expand);
358 setOperationAction(ISD::FREM , MVT::f80 , Expand);
359 setOperationAction(ISD::FREM , MVT::f128 , Expand);
360
361 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
367 }
368
369 // Promote the i8 variants and force them on up to i32 which has a shorter
370 // encoding.
371 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
373 // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
374 // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
375 // promote that too.
376 setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
378
379 if (!Subtarget.hasBMI()) {
380 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
382 if (Subtarget.is64Bit()) {
383 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
385 }
386 }
387
388 if (Subtarget.hasLZCNT()) {
389 // When promoting the i8 variants, force them to i32 for a shorter
390 // encoding.
391 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
393 } else {
394 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
395 if (VT == MVT::i64 && !Subtarget.is64Bit())
396 continue;
399 }
400 }
401
404 // Special handling for half-precision floating point conversions.
405 // If we don't have F16C support, then lower half float conversions
406 // into library calls.
408 Op, MVT::f32,
409 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
410 // There's never any support for operations beyond MVT::f32.
411 setOperationAction(Op, MVT::f64, Expand);
412 setOperationAction(Op, MVT::f80, Expand);
413 setOperationAction(Op, MVT::f128, Expand);
414 }
415
416 for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
417 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
418 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
419 setTruncStoreAction(VT, MVT::f16, Expand);
420 setTruncStoreAction(VT, MVT::bf16, Expand);
421
424 }
425
429 if (Subtarget.is64Bit())
431 if (Subtarget.hasPOPCNT()) {
432 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
433 // popcntw is longer to encode than popcntl and also has a false dependency
434 // on the dest that popcntl hasn't had since Cannon Lake.
435 setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
436 } else {
440 if (Subtarget.is64Bit())
442 else
444 }
445
447
448 if (!Subtarget.hasMOVBE())
450
451 // X86 wants to expand cmov itself.
452 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
457 }
458 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
459 if (VT == MVT::i64 && !Subtarget.is64Bit())
460 continue;
463 }
464
465 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
468
470 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
471 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
475 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
476 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
477
478 // Darwin ABI issue.
479 for (auto VT : { MVT::i32, MVT::i64 }) {
480 if (VT == MVT::i64 && !Subtarget.is64Bit())
481 continue;
488 }
489
490 // 64-bit shl, sra, srl (iff 32-bit x86)
491 for (auto VT : { MVT::i32, MVT::i64 }) {
492 if (VT == MVT::i64 && !Subtarget.is64Bit())
493 continue;
497 }
498
499 if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
501
503
504 // Expand certain atomics
505 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
513 }
514
515 if (!Subtarget.is64Bit())
517
518 if (Subtarget.canUseCMPXCHG16B())
520
521 // FIXME - use subtarget debug flags
522 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
523 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
524 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
526 }
527
530
533
534 setOperationAction(ISD::TRAP, MVT::Other, Legal);
536 if (Subtarget.isTargetPS())
538 else
540
541 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
543 setOperationAction(ISD::VAEND , MVT::Other, Expand);
544 bool Is64Bit = Subtarget.is64Bit();
545 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
546 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
547
550
552
553 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
556
558
559 auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
560 setOperationAction(ISD::FABS, VT, Action);
561 setOperationAction(ISD::FNEG, VT, Action);
563 setOperationAction(ISD::FREM, VT, Action);
564 setOperationAction(ISD::FMA, VT, Action);
565 setOperationAction(ISD::FMINNUM, VT, Action);
566 setOperationAction(ISD::FMAXNUM, VT, Action);
569 setOperationAction(ISD::FSIN, VT, Action);
570 setOperationAction(ISD::FCOS, VT, Action);
571 setOperationAction(ISD::FSINCOS, VT, Action);
572 setOperationAction(ISD::FSQRT, VT, Action);
573 setOperationAction(ISD::FPOW, VT, Action);
574 setOperationAction(ISD::FLOG, VT, Action);
575 setOperationAction(ISD::FLOG2, VT, Action);
576 setOperationAction(ISD::FLOG10, VT, Action);
577 setOperationAction(ISD::FEXP, VT, Action);
578 setOperationAction(ISD::FEXP2, VT, Action);
579 setOperationAction(ISD::FEXP10, VT, Action);
580 setOperationAction(ISD::FCEIL, VT, Action);
581 setOperationAction(ISD::FFLOOR, VT, Action);
583 setOperationAction(ISD::FRINT, VT, Action);
584 setOperationAction(ISD::BR_CC, VT, Action);
585 setOperationAction(ISD::SETCC, VT, Action);
588 setOperationAction(ISD::FROUND, VT, Action);
590 setOperationAction(ISD::FTRUNC, VT, Action);
591 setOperationAction(ISD::FLDEXP, VT, Action);
592 };
593
594 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
595 // f16, f32 and f64 use SSE.
596 // Set up the FP register classes.
597 addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
598 : &X86::FR16RegClass);
599 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
600 : &X86::FR32RegClass);
601 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
602 : &X86::FR64RegClass);
603
604 // Disable f32->f64 extload as we can only generate this in one instruction
605 // under optsize. So its easier to pattern match (fpext (load)) for that
606 // case instead of needing to emit 2 instructions for extload in the
607 // non-optsize case.
608 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
609
610 for (auto VT : { MVT::f32, MVT::f64 }) {
611 // Use ANDPD to simulate FABS.
613
614 // Use XORP to simulate FNEG.
616
617 // Use ANDPD and ORPD to simulate FCOPYSIGN.
619
620 // These might be better off as horizontal vector ops.
623
624 // We don't support sin/cos/fmod
628 }
629
630 // Half type will be promoted by default.
631 setF16Action(MVT::f16, Promote);
639
669
670 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
671 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
672
673 // Lower this to MOVMSK plus an AND.
676
677 } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
678 (UseX87 || Is64Bit)) {
679 // Use SSE for f32, x87 for f64.
680 // Set up the FP register classes.
681 addRegisterClass(MVT::f32, &X86::FR32RegClass);
682 if (UseX87)
683 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
684
685 // Use ANDPS to simulate FABS.
687
688 // Use XORP to simulate FNEG.
690
691 if (UseX87)
693
694 // Use ANDPS and ORPS to simulate FCOPYSIGN.
695 if (UseX87)
698
699 // We don't support sin/cos/fmod
703
704 if (UseX87) {
705 // Always expand sin/cos functions even though x87 has an instruction.
709 }
710 } else if (UseX87) {
711 // f32 and f64 in x87.
712 // Set up the FP register classes.
713 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
714 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
715
716 for (auto VT : { MVT::f32, MVT::f64 }) {
719
720 // Always expand sin/cos functions even though x87 has an instruction.
724 }
725 }
726
727 // Expand FP32 immediates into loads from the stack, save special cases.
728 if (isTypeLegal(MVT::f32)) {
729 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
730 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
731 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
732 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
733 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
734 } else // SSE immediates.
735 addLegalFPImmediate(APFloat(+0.0f)); // xorps
736 }
737 // Expand FP64 immediates into loads from the stack, save special cases.
738 if (isTypeLegal(MVT::f64)) {
739 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
740 addLegalFPImmediate(APFloat(+0.0)); // FLD0
741 addLegalFPImmediate(APFloat(+1.0)); // FLD1
742 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
743 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
744 } else // SSE immediates.
745 addLegalFPImmediate(APFloat(+0.0)); // xorpd
746 }
747 // Support fp16 0 immediate.
748 if (isTypeLegal(MVT::f16))
749 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
750
751 // Handle constrained floating-point operations of scalar.
764
765 // We don't support FMA.
768
769 // f80 always uses X87.
770 if (UseX87) {
771 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
774 {
776 addLegalFPImmediate(TmpFlt); // FLD0
777 TmpFlt.changeSign();
778 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
779
780 bool ignored;
781 APFloat TmpFlt2(+1.0);
783 &ignored);
784 addLegalFPImmediate(TmpFlt2); // FLD1
785 TmpFlt2.changeSign();
786 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
787 }
788
789 // Always expand sin/cos functions even though x87 has an instruction.
793
804
805 // Handle constrained floating-point operations of scalar.
811 if (isTypeLegal(MVT::f16)) {
814 } else {
816 }
817 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
818 // as Custom.
820 }
821
822 // f128 uses xmm registers, but most operations require libcalls.
823 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
824 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
825 : &X86::VR128RegClass);
826
827 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
828
839
843
849 // No STRICT_FSINCOS
852
855 // We need to custom handle any FP_ROUND with an f128 input, but
856 // LegalizeDAG uses the result type to know when to run a custom handler.
857 // So we have to list all legal floating point result types here.
858 if (isTypeLegal(MVT::f32)) {
861 }
862 if (isTypeLegal(MVT::f64)) {
865 }
866 if (isTypeLegal(MVT::f80)) {
869 }
870
872
873 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
874 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
875 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
876 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
877 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
878 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
879 }
880
881 // Always use a library call for pow.
882 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
883 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
884 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
885 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
886
895
896 // Some FP actions are always expanded for vector types.
897 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
898 MVT::v4f32, MVT::v8f32, MVT::v16f32,
899 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
912 }
913
914 // First set operation action for all vector types to either promote
915 // (for widening) or expand (for scalarization). Then we will selectively
916 // turn on ones that can be effectively codegen'd.
955 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
956 setTruncStoreAction(InnerVT, VT, Expand);
957
960
961 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
962 // types, we have to deal with them whether we ask for Expansion or not.
963 // Setting Expand causes its own optimisation problems though, so leave
964 // them legal.
965 if (VT.getVectorElementType() == MVT::i1)
966 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
967
968 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
969 // split/scalarized right now.
970 if (VT.getVectorElementType() == MVT::f16 ||
971 VT.getVectorElementType() == MVT::bf16)
972 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
973 }
974 }
975
976 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
977 // with -msoft-float, disable use of MMX as well.
978 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
979 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
980 // No operations on x86mmx supported, everything uses intrinsics.
981 }
982
983 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
984 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
985 : &X86::VR128RegClass);
986
989
990 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
991 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
998
999 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
1000 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
1001
1007 }
1008
1009 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
1010 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1011 : &X86::VR128RegClass);
1012
1013 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
1014 // registers cannot be used even for integer operations.
1015 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
1016 : &X86::VR128RegClass);
1017 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1018 : &X86::VR128RegClass);
1019 addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1020 : &X86::VR128RegClass);
1021 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1022 : &X86::VR128RegClass);
1023 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1024 : &X86::VR128RegClass);
1025
1026 for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
1029 }
1030
1031 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1032 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1037 }
1038
1039 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
1040 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
1041 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
1042
1043 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1044 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1045 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1046 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
1047 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
1048 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
1049 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
1050 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
1051 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
1052 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1055
1056 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
1057 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
1058 setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
1059
1060 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1061 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1063
1064 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1065 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
1066 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
1067 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
1068 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
1069 }
1070
1071 setOperationAction(ISD::ABDU, MVT::v16i8, Custom);
1072 setOperationAction(ISD::ABDS, MVT::v16i8, Custom);
1073 setOperationAction(ISD::ABDU, MVT::v8i16, Custom);
1074 setOperationAction(ISD::ABDS, MVT::v8i16, Custom);
1075 setOperationAction(ISD::ABDU, MVT::v4i32, Custom);
1076 setOperationAction(ISD::ABDS, MVT::v4i32, Custom);
1077
1088
1093
1094 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1098
1099 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1100 // setcc all the way to isel and prefer SETGT in some isel patterns.
1103 }
1104
1105 setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
1106 setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
1111
1112 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1118 }
1119
1120 for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1124
1125 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1126 continue;
1127
1130 }
1131 setF16Action(MVT::v8f16, Expand);
1132 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
1133 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
1134 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
1135 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
1136
1137 // Custom lower v2i64 and v2f64 selects.
1144
1151
1152 // Custom legalize these to avoid over promotion or custom promotion.
1153 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1158 }
1159
1164
1167
1170
1171 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1176
1181
1182 // We want to legalize this to an f64 load rather than an i64 load on
1183 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1184 // store.
1185 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1186 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1187 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1188 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1189 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1191
1192 // Add 32-bit vector stores to help vectorization opportunities.
1193 setOperationAction(ISD::STORE, MVT::v2i16, Custom);
1195
1199 if (!Subtarget.hasAVX512())
1201
1205
1207
1224
1225 // In the customized shift lowering, the legal v4i32/v2i64 cases
1226 // in AVX2 will be recognized.
1227 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1231 if (VT == MVT::v2i64) continue;
1236 }
1237
1243 }
1244
1245 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1246 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1247 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1248 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1250 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1251 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1252 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1253 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1254
1255 // These might be better off as horizontal vector ops.
1260 }
1261
1262 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1263 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1266 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1270 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1276
1278 }
1279
1280 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1281 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1282 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1283 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1284 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1285 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1286 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1287 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1288
1289 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1292 }
1293
1297
1298 // FIXME: Do we need to handle scalar-to-vector here?
1299 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1300 setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
1301
1302 // We directly match byte blends in the backend as they match the VSELECT
1303 // condition form.
1305
1306 // SSE41 brings specific instructions for doing vector sign extend even in
1307 // cases where we don't have SRA.
1308 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1311 }
1312
1313 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1314 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1315 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1316 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1317 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1318 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1319 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1320 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1321 }
1322
1323 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1324 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1325 // do the pre and post work in the vector domain.
1328 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1329 // so that DAG combine doesn't try to turn it into uint_to_fp.
1332 }
1333 }
1334
1335 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1337 }
1338
1339 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1340 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1341 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1344 }
1345
1346 // XOP can efficiently perform BITREVERSE with VPPERM.
1347 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1349
1350 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1351 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1353 }
1354
1355 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1356 bool HasInt256 = Subtarget.hasInt256();
1357
1358 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1359 : &X86::VR256RegClass);
1360 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1361 : &X86::VR256RegClass);
1362 addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1363 : &X86::VR256RegClass);
1364 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1365 : &X86::VR256RegClass);
1366 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1367 : &X86::VR256RegClass);
1368 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1369 : &X86::VR256RegClass);
1370 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1371 : &X86::VR256RegClass);
1372
1373 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1386
1388
1392
1395 }
1396
1397 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1398 // even though v8i16 is a legal type.
1399 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1400 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1401 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1402 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1406
1413
1425
1426 if (!Subtarget.hasAVX512())
1428
1429 // In the customized shift lowering, the legal v8i32/v4i64 cases
1430 // in AVX2 will be recognized.
1431 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1437 if (VT == MVT::v4i64) continue;
1442 }
1443
1444 // These types need custom splitting if their input is a 128-bit vector.
1449
1453 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1454 setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
1457
1458 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1462 }
1463
1468
1470
1471 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1475
1476 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1477 // setcc all the way to isel and prefer SETGT in some isel patterns.
1480 }
1481
1482 setOperationAction(ISD::SETCC, MVT::v4f64, Custom);
1483 setOperationAction(ISD::SETCC, MVT::v8f32, Custom);
1488
1489 if (Subtarget.hasAnyFMA()) {
1490 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1491 MVT::v2f64, MVT::v4f64 }) {
1494 }
1495 }
1496
1497 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1498 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1499 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1500 }
1501
1502 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1503 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1504 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1505 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1506
1507 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1508 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1509 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1510 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1511 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1512 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1513 setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
1514 setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
1515
1516 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1517 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1518
1519 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1520 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1521 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1522 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1523 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1524
1525 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1526 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1527 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1528 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1529 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1530 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1531 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1532 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1537
1538 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1539 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1540 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1541 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1542 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1543 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1544 }
1545
1546 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1549 }
1550
1551 if (HasInt256) {
1552 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1553 // when we have a 256bit-wide blend with immediate.
1556
1557 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1558 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1559 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1560 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1561 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1562 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1563 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1564 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1565 }
1566 }
1567
1568 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1569 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1570 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1572 }
1573
1574 // Extract subvector is special because the value type
1575 // (result) is 128-bit but the source is 256-bit wide.
1576 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1577 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1579 }
1580
1581 // Custom lower several nodes for 256-bit types.
1582 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1583 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1593 }
1594 setF16Action(MVT::v16f16, Expand);
1595 setOperationAction(ISD::FADD, MVT::v16f16, Expand);
1596 setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
1597 setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
1598 setOperationAction(ISD::FDIV, MVT::v16f16, Expand);
1599
1600 if (HasInt256) {
1602
1603 // Custom legalize 2x32 to get a little better code.
1606
1607 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1608 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1610 }
1611 }
1612
1613 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1614 Subtarget.hasF16C()) {
1615 for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1618 }
1619 for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
1622 }
1623 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1624 setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
1625 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1626 }
1627 }
1628
1629 // This block controls legalization of the mask vector sizes that are
1630 // available with AVX512. 512-bit vectors are in a separate block controlled
1631 // by useAVX512Regs.
1632 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1633 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1634 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1635 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1636 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1637 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1638
1642
1643 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1644 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1645 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1646 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1647 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1648 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1649 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1650 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1655
1656 // There is no byte sized k-register load or store without AVX512DQ.
1657 if (!Subtarget.hasDQI()) {
1658 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1659 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1660 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1661 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1662
1667 }
1668
1669 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1670 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1674 }
1675
1676 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1678
1679 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1683
1690 }
1691
1692 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1694 }
1695
1696 // This block controls legalization for 512-bit operations with 8/16/32/64 bit
1697 // elements. 512-bits can be disabled based on prefer-vector-width and
1698 // required-vector-width function attributes.
1699 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1700 bool HasBWI = Subtarget.hasBWI();
1701
1702 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1703 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1704 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1705 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1706 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1707 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1708 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1709
1710 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1711 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1712 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1713 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1714 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1715 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1716 if (HasBWI)
1717 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1718 }
1719
1720 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1728 }
1729
1730 for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1735 }
1736
1737 for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1742 }
1743
1750
1762
1763 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1764 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1765 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1766 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1767 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1768 if (HasBWI)
1769 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1770
1771 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1772 // to 512-bit rather than use the AVX2 instructions so that we can use
1773 // k-masks.
1774 if (!Subtarget.hasVLX()) {
1775 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1776 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1779 }
1780 }
1781
1783 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1784 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1794
1795 if (HasBWI) {
1796 // Extends from v64i1 masks to 512-bit vectors.
1800 }
1801
1802 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1815
1817 }
1818
1819 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1822 }
1823
1824 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1825 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1826 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1827 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1828
1829 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1830 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1831 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1832 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1833
1834 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1835 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1836 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1837 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1838 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1839 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1840 setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
1841 setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
1842
1843 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1844 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1845
1847
1848 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1857
1858 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1859 // setcc all the way to isel and prefer SETGT in some isel patterns.
1862 }
1863
1864 setOperationAction(ISD::SETCC, MVT::v8f64, Custom);
1865 setOperationAction(ISD::SETCC, MVT::v16f32, Custom);
1870
1871 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1878 }
1879
1880 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1881 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1882 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1884 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1885 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1886 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1887 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1892 }
1893
1894 setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
1895 setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
1896 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1897 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1898 setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
1899 setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
1900
1901 if (Subtarget.hasDQI()) {
1905 setOperationAction(Opc, MVT::v8i64, Custom);
1906 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1907 }
1908
1909 if (Subtarget.hasCDI()) {
1910 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1911 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1913 }
1914 } // Subtarget.hasCDI()
1915
1916 if (Subtarget.hasVPOPCNTDQ()) {
1917 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1919 }
1920
1921 // Extract subvector is special because the value type
1922 // (result) is 256-bit but the source is 512-bit wide.
1923 // 128-bit was made Legal under AVX1.
1924 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1925 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1927
1928 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1929 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
1939 }
1940 setF16Action(MVT::v32f16, Expand);
1945 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1946 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1947 setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
1948 }
1949
1950 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1955 }
1956 if (HasBWI) {
1957 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1960 }
1961 } else {
1962 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1963 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1964 }
1965
1966 if (Subtarget.hasVBMI2()) {
1967 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1968 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1969 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1972 }
1973
1974 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1975 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1976 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1977 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1978 }
1979 }// useAVX512Regs
1980
1981 // This block controls legalization for operations that don't have
1982 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1983 // narrower widths.
1984 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1985 // These operations are handled on non-VLX by artificially widening in
1986 // isel patterns.
1987
1991
1992 if (Subtarget.hasDQI()) {
1993 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1994 // v2f32 UINT_TO_FP is already custom under SSE2.
1997 "Unexpected operation action!");
1998 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
2003 }
2004
2005 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
2011 }
2012
2013 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2016 }
2017
2018 // Custom legalize 2x32 to get a little better code.
2021
2022 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2023 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2025
2026 if (Subtarget.hasDQI()) {
2030 setOperationAction(Opc, MVT::v2i64, Custom);
2031 setOperationAction(Opc, MVT::v4i64, Custom);
2032 }
2033 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
2034 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
2035 }
2036
2037 if (Subtarget.hasCDI()) {
2038 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2040 }
2041 } // Subtarget.hasCDI()
2042
2043 if (Subtarget.hasVPOPCNTDQ()) {
2044 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2046 }
2047 }
2048
2049 // This block control legalization of v32i1/v64i1 which are available with
2050 // AVX512BW..
2051 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2052 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
2053 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
2054
2055 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
2066 }
2067
2068 for (auto VT : { MVT::v16i1, MVT::v32i1 })
2070
2071 // Extends from v32i1 masks to 256-bit vectors.
2075
2076 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2077 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
2078 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
2079 }
2080
2081 // These operations are handled on non-VLX by artificially widening in
2082 // isel patterns.
2083 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
2084
2085 if (Subtarget.hasBITALG()) {
2086 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2088 }
2089 }
2090
2091 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2092 auto setGroup = [&] (MVT VT) {
2103
2114
2116
2119
2125
2131
2135 };
2136
2137 // AVX512_FP16 scalar operations
2138 setGroup(MVT::f16);
2152
2155
2156 if (Subtarget.useAVX512Regs()) {
2157 setGroup(MVT::v32f16);
2163 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
2170
2175 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
2177 MVT::v32i16);
2178 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
2180 MVT::v32i16);
2181 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
2183 MVT::v32i16);
2184 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
2186 MVT::v32i16);
2187
2191
2192 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2193 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2194 }
2195
2196 if (Subtarget.hasVLX()) {
2197 setGroup(MVT::v8f16);
2198 setGroup(MVT::v16f16);
2199
2210
2221
2222 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2225
2229
2230 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2231 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2232 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2233 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2234
2235 // Need to custom widen these to prevent scalarization.
2236 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2237 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2238 }
2239 }
2240
2241 if (!Subtarget.useSoftFloat() &&
2242 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2243 addRegisterClass(MVT::v8bf16, Subtarget.hasAVX512() ? &X86::VR128XRegClass
2244 : &X86::VR128RegClass);
2245 addRegisterClass(MVT::v16bf16, Subtarget.hasAVX512() ? &X86::VR256XRegClass
2246 : &X86::VR256RegClass);
2247 // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
2248 // provide the method to promote BUILD_VECTOR and INSERT_VECTOR_ELT.
2249 // Set the operation action Custom to do the customization later.
2252 for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2253 setF16Action(VT, Expand);
2260 }
2262 addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
2263 }
2264
2265 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2266 addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
2267 setF16Action(MVT::v32bf16, Expand);
2268 setOperationAction(ISD::FADD, MVT::v32bf16, Expand);
2269 setOperationAction(ISD::FSUB, MVT::v32bf16, Expand);
2270 setOperationAction(ISD::FMUL, MVT::v32bf16, Expand);
2271 setOperationAction(ISD::FDIV, MVT::v32bf16, Expand);
2273 setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom);
2275 }
2276
2277 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2278 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2279 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2280 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2281 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2282 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2283
2284 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2285 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2286 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2287 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2288 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2289
2290 if (Subtarget.hasBWI()) {
2291 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2292 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2293 }
2294
2295 if (Subtarget.hasFP16()) {
2296 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2305 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2314 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2319 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2324 }
2325 }
2326
2327 if (Subtarget.hasAMXTILE()) {
2328 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2329 }
2330
2331 // We want to custom lower some of our intrinsics.
2335 if (!Subtarget.is64Bit()) {
2337 }
2338
2339 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2340 // handle type legalization for these operations here.
2341 //
2342 // FIXME: We really should do custom legalization for addition and
2343 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2344 // than generic legalization for 64-bit multiplication-with-overflow, though.
2345 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2346 if (VT == MVT::i64 && !Subtarget.is64Bit())
2347 continue;
2348 // Add/Sub/Mul with overflow operations are custom lowered.
2355
2356 // Support carry in as value rather than glue.
2362 }
2363
2364 if (!Subtarget.is64Bit()) {
2365 // These libcalls are not available in 32-bit.
2366 setLibcallName(RTLIB::SHL_I128, nullptr);
2367 setLibcallName(RTLIB::SRL_I128, nullptr);
2368 setLibcallName(RTLIB::SRA_I128, nullptr);
2369 setLibcallName(RTLIB::MUL_I128, nullptr);
2370 // The MULO libcall is not part of libgcc, only compiler-rt.
2371 setLibcallName(RTLIB::MULO_I64, nullptr);
2372 }
2373 // The MULO libcall is not part of libgcc, only compiler-rt.
2374 setLibcallName(RTLIB::MULO_I128, nullptr);
2375
2376 // Combine sin / cos into _sincos_stret if it is available.
2377 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2378 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2381 }
2382
2383 if (Subtarget.isTargetWin64()) {
2384 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2385 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2386 setOperationAction(ISD::SREM, MVT::i128, Custom);
2387 setOperationAction(ISD::UREM, MVT::i128, Custom);
2396 }
2397
2398 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2399 // is. We should promote the value to 64-bits to solve this.
2400 // This is what the CRT headers do - `fmodf` is an inline header
2401 // function casting to f64 and calling `fmod`.
2402 if (Subtarget.is32Bit() &&
2403 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2404 for (ISD::NodeType Op :
2414 if (isOperationExpand(Op, MVT::f32))
2415 setOperationAction(Op, MVT::f32, Promote);
2416
2417 // We have target-specific dag combine patterns for the following nodes:
2428 ISD::SHL,
2429 ISD::SRA,
2430 ISD::SRL,
2431 ISD::OR,
2432 ISD::AND,
2433 ISD::ADD,
2434 ISD::FADD,
2435 ISD::FSUB,
2436 ISD::FNEG,
2437 ISD::FMA,
2441 ISD::SUB,
2442 ISD::LOAD,
2443 ISD::MLOAD,
2444 ISD::STORE,
2458 ISD::SETCC,
2459 ISD::MUL,
2460 ISD::XOR,
2468
2470
2471 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2473 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2475 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2477
2478 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2479 // that needs to benchmarked and balanced with the potential use of vector
2480 // load/store types (PR33329, PR33914).
2483
2484 // Default loop alignment, which can be overridden by -align-loops.
2486
2487 // An out-of-order CPU can speculatively execute past a predictable branch,
2488 // but a conditional move could be stalled by an expensive earlier operation.
2489 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2490 EnableExtLdPromotion = true;
2492
2494
2495 // Default to having -disable-strictnode-mutation on
2496 IsStrictFPEnabled = true;
2497}
2498
2499// This has so far only been implemented for 64-bit MachO.
2501 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2502}
2503
2505 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2506 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2507}
2508
2510 const SDLoc &DL) const {
2511 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2512 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2513 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2514 return SDValue(Node, 0);
2515}
2516
2519 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2520 !Subtarget.hasBWI())
2521 return TypeSplitVector;
2522
2523 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2524 !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
2525 return TypeSplitVector;
2526
2527 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2528 VT.getVectorElementType() != MVT::i1)
2529 return TypeWidenVector;
2530
2532}
2533
2534FastISel *
2536 const TargetLibraryInfo *libInfo) const {
2537 return X86::createFastISel(funcInfo, libInfo);
2538}
2539
2540//===----------------------------------------------------------------------===//
2541// Other Lowering Hooks
2542//===----------------------------------------------------------------------===//
2543
2545 bool AssumeSingleUse) {
2546 if (!AssumeSingleUse && !Op.hasOneUse())
2547 return false;
2548 if (!ISD::isNormalLoad(Op.getNode()))
2549 return false;
2550
2551 // If this is an unaligned vector, make sure the target supports folding it.
2552 auto *Ld = cast<LoadSDNode>(Op.getNode());
2553 if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
2554 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
2555 return false;
2556
2557 // TODO: If this is a non-temporal load and the target has an instruction
2558 // for it, it should not be folded. See "useNonTemporalLoad()".
2559
2560 return true;
2561}
2562
2564 const X86Subtarget &Subtarget,
2565 bool AssumeSingleUse) {
2566 assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory");
2567 if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
2568 return false;
2569
2570 // We can not replace a wide volatile load with a broadcast-from-memory,
2571 // because that would narrow the load, which isn't legal for volatiles.
2572 auto *Ld = cast<LoadSDNode>(Op.getNode());
2573 return !Ld->isVolatile() ||
2574 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
2575}
2576
2578 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
2579}
2580
2582 if (Op.hasOneUse()) {
2583 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
2584 return (ISD::ZERO_EXTEND == Opcode);
2585 }
2586 return false;
2587}
2588
2589static bool isTargetShuffle(unsigned Opcode) {
2590 switch(Opcode) {
2591 default: return false;
2592 case X86ISD::BLENDI:
2593 case X86ISD::PSHUFB:
2594 case X86ISD::PSHUFD:
2595 case X86ISD::PSHUFHW:
2596 case X86ISD::PSHUFLW:
2597 case X86ISD::SHUFP:
2598 case X86ISD::INSERTPS:
2599 case X86ISD::EXTRQI:
2600 case X86ISD::INSERTQI:
2601 case X86ISD::VALIGN:
2602 case X86ISD::PALIGNR:
2603 case X86ISD::VSHLDQ:
2604 case X86ISD::VSRLDQ:
2605 case X86ISD::MOVLHPS:
2606 case X86ISD::MOVHLPS:
2607 case X86ISD::MOVSHDUP:
2608 case X86ISD::MOVSLDUP:
2609 case X86ISD::MOVDDUP:
2610 case X86ISD::MOVSS:
2611 case X86ISD::MOVSD:
2612 case X86ISD::MOVSH:
2613 case X86ISD::UNPCKL:
2614 case X86ISD::UNPCKH:
2615 case X86ISD::VBROADCAST:
2616 case X86ISD::VPERMILPI:
2617 case X86ISD::VPERMILPV:
2618 case X86ISD::VPERM2X128:
2619 case X86ISD::SHUF128:
2620 case X86ISD::VPERMIL2:
2621 case X86ISD::VPERMI:
2622 case X86ISD::VPPERM:
2623 case X86ISD::VPERMV:
2624 case X86ISD::VPERMV3:
2625 case X86ISD::VZEXT_MOVL:
2626 return true;
2627 }
2628}
2629
2630static bool isTargetShuffleVariableMask(unsigned Opcode) {
2631 switch (Opcode) {
2632 default: return false;
2633 // Target Shuffles.
2634 case X86ISD::PSHUFB:
2635 case X86ISD::VPERMILPV:
2636 case X86ISD::VPERMIL2:
2637 case X86ISD::VPPERM:
2638 case X86ISD::VPERMV:
2639 case X86ISD::VPERMV3:
2640 return true;
2641 // 'Faux' Target Shuffles.
2642 case ISD::OR:
2643 case ISD::AND:
2644 case X86ISD::ANDNP:
2645 return true;
2646 }
2647}
2648
2651 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2653 int ReturnAddrIndex = FuncInfo->getRAIndex();
2654
2655 if (ReturnAddrIndex == 0) {
2656 // Set up a frame object for the return address.
2657 unsigned SlotSize = RegInfo->getSlotSize();
2658 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
2659 -(int64_t)SlotSize,
2660 false);
2661 FuncInfo->setRAIndex(ReturnAddrIndex);
2662 }
2663
2664 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
2665}
2666
2668 bool hasSymbolicDisplacement) {
2669 // Offset should fit into 32 bit immediate field.
2670 if (!isInt<32>(Offset))
2671 return false;
2672
2673 // If we don't have a symbolic displacement - we don't have any extra
2674 // restrictions.
2675 if (!hasSymbolicDisplacement)
2676 return true;
2677
2678 // FIXME: Some tweaks might be needed for medium code model.
2679 if (M != CodeModel::Small && M != CodeModel::Kernel)
2680 return false;
2681
2682 // For small code model we assume that latest object is 16MB before end of 31
2683 // bits boundary. We may also accept pretty large negative constants knowing
2684 // that all objects are in the positive half of address space.
2685 if (M == CodeModel::Small && Offset < 16*1024*1024)
2686 return true;
2687
2688 // For kernel code model we know that all object resist in the negative half
2689 // of 32bits address space. We may not accept negative offsets, since they may
2690 // be just off and we may accept pretty large positive ones.
2691 if (M == CodeModel::Kernel && Offset >= 0)
2692 return true;
2693
2694 return false;
2695}
2696
2697/// Return true if the condition is an signed comparison operation.
2698static bool isX86CCSigned(unsigned X86CC) {
2699 switch (X86CC) {
2700 default:
2701 llvm_unreachable("Invalid integer condition!");
2702 case X86::COND_E:
2703 case X86::COND_NE:
2704 case X86::COND_B:
2705 case X86::COND_A:
2706 case X86::COND_BE:
2707 case X86::COND_AE:
2708 return false;
2709 case X86::COND_G:
2710 case X86::COND_GE:
2711 case X86::COND_L:
2712 case X86::COND_LE:
2713 return true;
2714 }
2715}
2716
2718 switch (SetCCOpcode) {
2719 default: llvm_unreachable("Invalid integer condition!");
2720 case ISD::SETEQ: return X86::COND_E;
2721 case ISD::SETGT: return X86::COND_G;
2722 case ISD::SETGE: return X86::COND_GE;
2723 case ISD::SETLT: return X86::COND_L;
2724 case ISD::SETLE: return X86::COND_LE;
2725 case ISD::SETNE: return X86::COND_NE;
2726 case ISD::SETULT: return X86::COND_B;
2727 case ISD::SETUGT: return X86::COND_A;
2728 case ISD::SETULE: return X86::COND_BE;
2729 case ISD::SETUGE: return X86::COND_AE;
2730 }
2731}
2732
2733/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
2734/// condition code, returning the condition code and the LHS/RHS of the
2735/// comparison to make.
2737 bool isFP, SDValue &LHS, SDValue &RHS,
2738 SelectionDAG &DAG) {
2739 if (!isFP) {
2740 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2741 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
2742 // X > -1 -> X == 0, jump !sign.
2743 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2744 return X86::COND_NS;
2745 }
2746 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
2747 // X < 0 -> X == 0, jump on sign.
2748 return X86::COND_S;
2749 }
2750 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
2751 // X >= 0 -> X == 0, jump on !sign.
2752 return X86::COND_NS;
2753 }
2754 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
2755 // X < 1 -> X <= 0
2756 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2757 return X86::COND_LE;
2758 }
2759 }
2760
2761 return TranslateIntegerX86CC(SetCCOpcode);
2762 }
2763
2764 // First determine if it is required or is profitable to flip the operands.
2765
2766 // If LHS is a foldable load, but RHS is not, flip the condition.
2767 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
2768 !ISD::isNON_EXTLoad(RHS.getNode())) {
2769 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
2770 std::swap(LHS, RHS);
2771 }
2772
2773 switch (SetCCOpcode) {
2774 default: break;
2775 case ISD::SETOLT:
2776 case ISD::SETOLE:
2777 case ISD::SETUGT:
2778 case ISD::SETUGE:
2779 std::swap(LHS, RHS);
2780 break;
2781 }
2782
2783 // On a floating point condition, the flags are set as follows:
2784 // ZF PF CF op
2785 // 0 | 0 | 0 | X > Y
2786 // 0 | 0 | 1 | X < Y
2787 // 1 | 0 | 0 | X == Y
2788 // 1 | 1 | 1 | unordered
2789 switch (SetCCOpcode) {
2790 default: llvm_unreachable("Condcode should be pre-legalized away");
2791 case ISD::SETUEQ:
2792 case ISD::SETEQ: return X86::COND_E;
2793 case ISD::SETOLT: // flipped
2794 case ISD::SETOGT:
2795 case ISD::SETGT: return X86::COND_A;
2796 case ISD::SETOLE: // flipped
2797 case ISD::SETOGE:
2798 case ISD::SETGE: return X86::COND_AE;
2799 case ISD::SETUGT: // flipped
2800 case ISD::SETULT:
2801 case ISD::SETLT: return X86::COND_B;
2802 case ISD::SETUGE: // flipped
2803 case ISD::SETULE:
2804 case ISD::SETLE: return X86::COND_BE;
2805 case ISD::SETONE:
2806 case ISD::SETNE: return X86::COND_NE;
2807 case ISD::SETUO: return X86::COND_P;
2808 case ISD::SETO: return X86::COND_NP;
2809 case ISD::SETOEQ:
2810 case ISD::SETUNE: return X86::COND_INVALID;
2811 }
2812}
2813
2814/// Is there a floating point cmov for the specific X86 condition code?
2815/// Current x86 isa includes the following FP cmov instructions:
2816/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
2817static bool hasFPCMov(unsigned X86CC) {
2818 switch (X86CC) {
2819 default:
2820 return false;
2821 case X86::COND_B:
2822 case X86::COND_BE:
2823 case X86::COND_E:
2824 case X86::COND_P:
2825 case X86::COND_A:
2826 case X86::COND_AE:
2827 case X86::COND_NE:
2828 case X86::COND_NP:
2829 return true;
2830 }
2831}
2832
2833static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
2834 return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
2835 VT.is512BitVector();
2836}
2837
2839 const CallInst &I,
2840 MachineFunction &MF,
2841 unsigned Intrinsic) const {
2843 Info.offset = 0;
2844
2845 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
2846 if (!IntrData) {
2847 switch (Intrinsic) {
2848 case Intrinsic::x86_aesenc128kl:
2849 case Intrinsic::x86_aesdec128kl:
2851 Info.ptrVal = I.getArgOperand(1);
2852 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
2853 Info.align = Align(1);
2855 return true;
2856 case Intrinsic::x86_aesenc256kl:
2857 case Intrinsic::x86_aesdec256kl:
2859 Info.ptrVal = I.getArgOperand(1);
2860 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
2861 Info.align = Align(1);
2863 return true;
2864 case Intrinsic::x86_aesencwide128kl:
2865 case Intrinsic::x86_aesdecwide128kl:
2867 Info.ptrVal = I.getArgOperand(0);
2868 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
2869 Info.align = Align(1);
2871 return true;
2872 case Intrinsic::x86_aesencwide256kl:
2873 case Intrinsic::x86_aesdecwide256kl:
2875 Info.ptrVal = I.getArgOperand(0);
2876 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
2877 Info.align = Align(1);
2879 return true;
2880 case Intrinsic::x86_cmpccxadd32:
2881 case Intrinsic::x86_cmpccxadd64:
2882 case Intrinsic::x86_atomic_bts:
2883 case Intrinsic::x86_atomic_btc:
2884 case Intrinsic::x86_atomic_btr: {
2886 Info.ptrVal = I.getArgOperand(0);
2887 unsigned Size = I.getType()->getScalarSizeInBits();
2888 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
2889 Info.align = Align(Size);
2892 return true;
2893 }
2894 case Intrinsic::x86_atomic_bts_rm:
2895 case Intrinsic::x86_atomic_btc_rm:
2896 case Intrinsic::x86_atomic_btr_rm: {
2898 Info.ptrVal = I.getArgOperand(0);
2899 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
2900 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
2901 Info.align = Align(Size);
2904 return true;
2905 }
2906 case Intrinsic::x86_aadd32:
2907 case Intrinsic::x86_aadd64:
2908 case Intrinsic::x86_aand32:
2909 case Intrinsic::x86_aand64:
2910 case Intrinsic::x86_aor32:
2911 case Intrinsic::x86_aor64:
2912 case Intrinsic::x86_axor32:
2913 case Intrinsic::x86_axor64:
2914 case Intrinsic::x86_atomic_add_cc:
2915 case Intrinsic::x86_atomic_sub_cc:
2916 case Intrinsic::x86_atomic_or_cc:
2917 case Intrinsic::x86_atomic_and_cc:
2918 case Intrinsic::x86_atomic_xor_cc: {
2920 Info.ptrVal = I.getArgOperand(0);
2921 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
2922 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
2923 Info.align = Align(Size);
2926 return true;
2927 }
2928 }
2929 return false;
2930 }
2931
2932 switch (IntrData->Type) {
2935 case TRUNCATE_TO_MEM_VI32: {
2937 Info.ptrVal = I.getArgOperand(0);
2938 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
2940 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
2941 ScalarVT = MVT::i8;
2942 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
2943 ScalarVT = MVT::i16;
2944 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
2945 ScalarVT = MVT::i32;
2946
2947 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
2948 Info.align = Align(1);
2950 break;
2951 }
2952 case GATHER:
2953 case GATHER_AVX2: {
2955 Info.ptrVal = nullptr;
2956 MVT DataVT = MVT::getVT(I.getType());
2957 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
2958 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
2959 IndexVT.getVectorNumElements());
2960 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
2961 Info.align = Align(1);
2963 break;
2964 }
2965 case SCATTER: {
2967 Info.ptrVal = nullptr;
2968 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
2969 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
2970 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
2971 IndexVT.getVectorNumElements());
2972 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
2973 Info.align = Align(1);
2975 break;
2976 }
2977 default:
2978 return false;
2979 }
2980
2981 return true;
2982}
2983
2984/// Returns true if the target can instruction select the
2985/// specified FP immediate natively. If false, the legalizer will
2986/// materialize the FP immediate as a load from a constant pool.
2988 bool ForCodeSize) const {
2989 for (const APFloat &FPImm : LegalFPImmediates)
2990 if (Imm.bitwiseIsEqual(FPImm))
2991 return true;
2992 return false;
2993}
2994
2996 ISD::LoadExtType ExtTy,
2997 EVT NewVT) const {
2998 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");
2999
3000 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
3001 // relocation target a movq or addq instruction: don't let the load shrink.
3002 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
3003 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
3004 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
3005 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
3006
3007 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
3008 // those uses are extracted directly into a store, then the extract + store
3009 // can be store-folded. Therefore, it's probably not worth splitting the load.
3010 EVT VT = Load->getValueType(0);
3011 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
3012 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
3013 // Skip uses of the chain value. Result 0 of the node is the load value.
3014 if (UI.getUse().getResNo() != 0)
3015 continue;
3016
3017 // If this use is not an extract + store, it's probably worth splitting.
3018 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
3019 UI->use_begin()->getOpcode() != ISD::STORE)
3020 return true;
3021 }
3022 // All non-chain uses are extract + store.
3023 return false;
3024 }
3025
3026 return true;
3027}
3028
3029/// Returns true if it is beneficial to convert a load of a constant
3030/// to just the constant itself.
3032 Type *Ty) const {
3033 assert(Ty->isIntegerTy());
3034
3035 unsigned BitSize = Ty->getPrimitiveSizeInBits();
3036 if (BitSize == 0 || BitSize > 64)
3037 return false;
3038 return true;
3039}
3040
3042 // If we are using XMM registers in the ABI and the condition of the select is
3043 // a floating-point compare and we have blendv or conditional move, then it is
3044 // cheaper to select instead of doing a cross-register move and creating a
3045 // load that depends on the compare result.
3046 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
3047 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
3048}
3049
3051 // TODO: It might be a win to ease or lift this restriction, but the generic
3052 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
3053 if (VT.isVector() && Subtarget.hasAVX512())
3054 return false;
3055
3056 return true;
3057}
3058
3060 SDValue C) const {
3061 // TODO: We handle scalars using custom code, but generic combining could make
3062 // that unnecessary.
3063 APInt MulC;
3064 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
3065 return false;
3066
3067 // Find the type this will be legalized too. Otherwise we might prematurely
3068 // convert this to shl+add/sub and then still have to type legalize those ops.
3069 // Another choice would be to defer the decision for illegal types until
3070 // after type legalization. But constant splat vectors of i64 can't make it
3071 // through type legalization on 32-bit targets so we would need to special
3072 // case vXi64.
3073 while (getTypeAction(Context, VT) != TypeLegal)
3074 VT = getTypeToTransformTo(Context, VT);
3075
3076 // If vector multiply is legal, assume that's faster than shl + add/sub.
3077 // Multiply is a complex op with higher latency and lower throughput in
3078 // most implementations, sub-vXi32 vector multiplies are always fast,
3079 // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
3080 // is always going to be slow.
3081 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3082 if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
3083 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
3084 return false;
3085
3086 // shl+add, shl+sub, shl+add+neg
3087 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
3088 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
3089}
3090
3092 unsigned Index) const {
3094 return false;
3095
3096 // Mask vectors support all subregister combinations and operations that
3097 // extract half of vector.
3098 if (ResVT.getVectorElementType() == MVT::i1)
3099 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
3100 (Index == ResVT.getVectorNumElements()));
3101
3102 return (Index % ResVT.getVectorNumElements()) == 0;
3103}
3104
3106 unsigned Opc = VecOp.getOpcode();
3107
3108 // Assume target opcodes can't be scalarized.
3109 // TODO - do we have any exceptions?
3110 if (Opc >= ISD::BUILTIN_OP_END)
3111 return false;
3112
3113 // If the vector op is not supported, try to convert to scalar.
3114 EVT VecVT = VecOp.getValueType();
3115 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
3116 return true;
3117
3118 // If the vector op is supported, but the scalar op is not, the transform may
3119 // not be worthwhile.
3120 EVT ScalarVT = VecVT.getScalarType();
3121 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
3122}
3123
3125 bool) const {
3126 // TODO: Allow vectors?
3127 if (VT.isVector())
3128 return false;
3129 return VT.isSimple() || !isOperationExpand(Opcode, VT);
3130}
3131
3133 // Speculate cttz only if we can directly use TZCNT or can promote to i32.
3134 return Subtarget.hasBMI() ||
3135 (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
3136}
3137
3139 // Speculate ctlz only if we can directly use LZCNT.
3140 return Subtarget.hasLZCNT();
3141}
3142
3144 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
3145 // expensive than a straight movsd. On the other hand, it's important to
3146 // shrink long double fp constant since fldt is very slow.
3147 return !Subtarget.hasSSE2() || VT == MVT::f80;
3148}
3149
3151 return (VT == MVT::f64 && Subtarget.hasSSE2()) ||
3152 (VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16;
3153}
3154
3156 const SelectionDAG &DAG,
3157 const MachineMemOperand &MMO) const {
3158 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
3159 BitcastVT.getVectorElementType() == MVT::i1)
3160 return false;
3161
3162 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
3163 return false;
3164
3165 // If both types are legal vectors, it's always ok to convert them.
3166 if (LoadVT.isVector() && BitcastVT.isVector() &&
3167 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
3168 return true;
3169
3170 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
3171}
3172
3174 const MachineFunction &MF) const {
3175 // Do not merge to float value size (128 bytes) if no implicit
3176 // float attribute is set.
3177 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
3178
3179 if (NoFloat) {
3180 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
3181 return (MemVT.getSizeInBits() <= MaxIntSize);
3182 }
3183 // Make sure we don't merge greater than our preferred vector
3184 // width.
3185 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
3186