LLVM 19.0.0git
X86ISelLowering.cpp
Go to the documentation of this file.
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
43#include "llvm/IR/CallingConv.h"
44#include "llvm/IR/Constants.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Intrinsics.h"
54#include "llvm/MC/MCAsmInfo.h"
55#include "llvm/MC/MCContext.h"
56#include "llvm/MC/MCExpr.h"
57#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/Debug.h"
64#include <algorithm>
65#include <bitset>
66#include <cctype>
67#include <numeric>
68using namespace llvm;
69
70#define DEBUG_TYPE "x86-isel"
71
73 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
75 "Sets the preferable loop alignment for experiments (as log2 bytes) "
76 "for innermost loops only. If specified, this option overrides "
77 "alignment set by x86-experimental-pref-loop-alignment."),
79
81 "mul-constant-optimization", cl::init(true),
82 cl::desc("Replace 'mul x, Const' with more effective instructions like "
83 "SHIFT, LEA, etc."),
85
87 const X86Subtarget &STI)
88 : TargetLowering(TM), Subtarget(STI) {
89 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
90 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
91
92 // Set up the TargetLowering object.
93
94 // X86 is weird. It always uses i8 for shift amounts and setcc results.
96 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
98
99 // For 64-bit, since we have so many registers, use the ILP scheduler.
100 // For 32-bit, use the register pressure specific scheduling.
101 // For Atom, always use ILP scheduling.
102 if (Subtarget.isAtom())
104 else if (Subtarget.is64Bit())
106 else
108 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
110
111 // Bypass expensive divides and use cheaper ones.
112 if (TM.getOptLevel() >= CodeGenOptLevel::Default) {
113 if (Subtarget.hasSlowDivide32())
114 addBypassSlowDiv(32, 8);
115 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
116 addBypassSlowDiv(64, 32);
117 }
118
119 // Setup Windows compiler runtime calls.
120 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
121 static const struct {
122 const RTLIB::Libcall Op;
123 const char * const Name;
124 const CallingConv::ID CC;
125 } LibraryCalls[] = {
126 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
127 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
128 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
129 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
130 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
131 };
132
133 for (const auto &LC : LibraryCalls) {
134 setLibcallName(LC.Op, LC.Name);
135 setLibcallCallingConv(LC.Op, LC.CC);
136 }
137 }
138
139 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
140 // MSVCRT doesn't have powi; fall back to pow
141 setLibcallName(RTLIB::POWI_F32, nullptr);
142 setLibcallName(RTLIB::POWI_F64, nullptr);
143 }
144
145 if (Subtarget.canUseCMPXCHG16B())
147 else if (Subtarget.canUseCMPXCHG8B())
149 else
151
152 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
153
155
156 // Set up the register classes.
157 addRegisterClass(MVT::i8, &X86::GR8RegClass);
158 addRegisterClass(MVT::i16, &X86::GR16RegClass);
159 addRegisterClass(MVT::i32, &X86::GR32RegClass);
160 if (Subtarget.is64Bit())
161 addRegisterClass(MVT::i64, &X86::GR64RegClass);
162
163 for (MVT VT : MVT::integer_valuetypes())
165
166 // We don't accept any truncstore of integer registers.
167 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
168 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
169 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
170 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
171 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
172 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
173
174 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
175
176 // SETOEQ and SETUNE require checking two conditions.
177 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
180 }
181
182 // Integer absolute.
183 if (Subtarget.canUseCMOV()) {
184 setOperationAction(ISD::ABS , MVT::i16 , Custom);
185 setOperationAction(ISD::ABS , MVT::i32 , Custom);
186 if (Subtarget.is64Bit())
187 setOperationAction(ISD::ABS , MVT::i64 , Custom);
188 }
189
190 // Absolute difference.
191 for (auto Op : {ISD::ABDS, ISD::ABDU}) {
192 setOperationAction(Op , MVT::i8 , Custom);
193 setOperationAction(Op , MVT::i16 , Custom);
194 setOperationAction(Op , MVT::i32 , Custom);
195 if (Subtarget.is64Bit())
196 setOperationAction(Op , MVT::i64 , Custom);
197 }
198
199 // Signed saturation subtraction.
203 if (Subtarget.is64Bit())
205
206 // Funnel shifts.
207 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
208 // For slow shld targets we only lower for code size.
209 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
210
211 setOperationAction(ShiftOp , MVT::i8 , Custom);
212 setOperationAction(ShiftOp , MVT::i16 , Custom);
213 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
214 if (Subtarget.is64Bit())
215 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
216 }
217
218 if (!Subtarget.useSoftFloat()) {
219 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
220 // operation.
225 // We have an algorithm for SSE2, and we turn this into a 64-bit
226 // FILD or VCVTUSI2SS/SD for other targets.
229 // We have an algorithm for SSE2->double, and we turn this into a
230 // 64-bit FILD followed by conditional FADD for other targets.
233
234 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
235 // this operation.
238 // SSE has no i16 to fp conversion, only i32. We promote in the handler
239 // to allow f80 to use i16 and f64 to use i16 with sse1 only
242 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
245 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
246 // are Legal, f80 is custom lowered.
249
250 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
251 // this operation.
253 // FIXME: This doesn't generate invalid exception when it should. PR44019.
259 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
260 // are Legal, f80 is custom lowered.
263
264 // Handle FP_TO_UINT by promoting the destination to a larger signed
265 // conversion.
267 // FIXME: This doesn't generate invalid exception when it should. PR44019.
270 // FIXME: This doesn't generate invalid exception when it should. PR44019.
276
281
282 if (!Subtarget.is64Bit()) {
285 }
286 }
287
288 if (Subtarget.hasSSE2()) {
289 // Custom lowering for saturating float to int conversions.
290 // We handle promotion to larger result types manually.
291 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
294 }
295 if (Subtarget.is64Bit()) {
298 }
299 }
300
301 // Handle address space casts between mixed sized pointers.
304
305 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
306 if (!Subtarget.hasSSE2()) {
309 if (Subtarget.is64Bit()) {
311 // Without SSE, i64->f64 goes through memory.
313 }
314 } else if (!Subtarget.is64Bit())
316
317 // Scalar integer divide and remainder are lowered to use operations that
318 // produce two results, to match the available instructions. This exposes
319 // the two-result form to trivial CSE, which is able to combine x/y and x%y
320 // into a single instruction.
321 //
322 // Scalar integer multiply-high is also lowered to use two-result
323 // operations, to match the available instructions. However, plain multiply
324 // (low) operations are left as Legal, as there are single-result
325 // instructions for this in x86. Using the two-result multiply instructions
326 // when both high and low results are needed must be arranged by dagcombine.
327 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
334 }
335
336 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
338 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
339 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
342 }
343 if (Subtarget.is64Bit())
348
349 setOperationAction(ISD::FREM , MVT::f32 , Expand);
350 setOperationAction(ISD::FREM , MVT::f64 , Expand);
351 setOperationAction(ISD::FREM , MVT::f80 , Expand);
352 setOperationAction(ISD::FREM , MVT::f128 , Expand);
353
354 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
360 }
361
362 // Promote the i8 variants and force them on up to i32 which has a shorter
363 // encoding.
364 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
366 // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
367 // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
368 // promote that too.
369 setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
371
372 if (!Subtarget.hasBMI()) {
373 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
375 if (Subtarget.is64Bit()) {
376 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
378 }
379 }
380
381 if (Subtarget.hasLZCNT()) {
382 // When promoting the i8 variants, force them to i32 for a shorter
383 // encoding.
384 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
386 } else {
387 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
388 if (VT == MVT::i64 && !Subtarget.is64Bit())
389 continue;
392 }
393 }
394
397 // Special handling for half-precision floating point conversions.
398 // If we don't have F16C support, then lower half float conversions
399 // into library calls.
401 Op, MVT::f32,
402 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
403 // There's never any support for operations beyond MVT::f32.
404 setOperationAction(Op, MVT::f64, Expand);
405 setOperationAction(Op, MVT::f80, Expand);
406 setOperationAction(Op, MVT::f128, Expand);
407 }
408
409 for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
410 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
411 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
412 setTruncStoreAction(VT, MVT::f16, Expand);
413 setTruncStoreAction(VT, MVT::bf16, Expand);
414
417 }
418
422 if (Subtarget.is64Bit())
424 if (Subtarget.hasPOPCNT()) {
425 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
426 // popcntw is longer to encode than popcntl and also has a false dependency
427 // on the dest that popcntl hasn't had since Cannon Lake.
428 setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
429 } else {
434 }
435
437
438 if (!Subtarget.hasMOVBE())
440
441 // X86 wants to expand cmov itself.
442 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
447 }
448 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
449 if (VT == MVT::i64 && !Subtarget.is64Bit())
450 continue;
453 }
454
455 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
458
460 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
461 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
465 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
466 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
467
468 // Darwin ABI issue.
469 for (auto VT : { MVT::i32, MVT::i64 }) {
470 if (VT == MVT::i64 && !Subtarget.is64Bit())
471 continue;
478 }
479
480 // 64-bit shl, sra, srl (iff 32-bit x86)
481 for (auto VT : { MVT::i32, MVT::i64 }) {
482 if (VT == MVT::i64 && !Subtarget.is64Bit())
483 continue;
487 }
488
489 if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
491
493
494 // Expand certain atomics
495 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
503 }
504
505 if (!Subtarget.is64Bit())
507
508 if (Subtarget.canUseCMPXCHG16B())
510
511 // FIXME - use subtarget debug flags
512 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
513 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
514 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
516 }
517
520
523
524 setOperationAction(ISD::TRAP, MVT::Other, Legal);
526 if (Subtarget.isTargetPS())
528 else
530
531 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
533 setOperationAction(ISD::VAEND , MVT::Other, Expand);
534 bool Is64Bit = Subtarget.is64Bit();
535 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
536 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
537
540
542
543 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
546
548
549 auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
550 setOperationAction(ISD::FABS, VT, Action);
551 setOperationAction(ISD::FNEG, VT, Action);
553 setOperationAction(ISD::FREM, VT, Action);
554 setOperationAction(ISD::FMA, VT, Action);
555 setOperationAction(ISD::FMINNUM, VT, Action);
556 setOperationAction(ISD::FMAXNUM, VT, Action);
559 setOperationAction(ISD::FSIN, VT, Action);
560 setOperationAction(ISD::FCOS, VT, Action);
561 setOperationAction(ISD::FSINCOS, VT, Action);
562 setOperationAction(ISD::FSQRT, VT, Action);
563 setOperationAction(ISD::FPOW, VT, Action);
564 setOperationAction(ISD::FLOG, VT, Action);
565 setOperationAction(ISD::FLOG2, VT, Action);
566 setOperationAction(ISD::FLOG10, VT, Action);
567 setOperationAction(ISD::FEXP, VT, Action);
568 setOperationAction(ISD::FEXP2, VT, Action);
569 setOperationAction(ISD::FEXP10, VT, Action);
570 setOperationAction(ISD::FCEIL, VT, Action);
571 setOperationAction(ISD::FFLOOR, VT, Action);
573 setOperationAction(ISD::FRINT, VT, Action);
574 setOperationAction(ISD::BR_CC, VT, Action);
575 setOperationAction(ISD::SETCC, VT, Action);
578 setOperationAction(ISD::FROUND, VT, Action);
580 setOperationAction(ISD::FTRUNC, VT, Action);
581 setOperationAction(ISD::FLDEXP, VT, Action);
582 };
583
584 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
585 // f16, f32 and f64 use SSE.
586 // Set up the FP register classes.
587 addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
588 : &X86::FR16RegClass);
589 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
590 : &X86::FR32RegClass);
591 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
592 : &X86::FR64RegClass);
593
594 // Disable f32->f64 extload as we can only generate this in one instruction
595 // under optsize. So its easier to pattern match (fpext (load)) for that
596 // case instead of needing to emit 2 instructions for extload in the
597 // non-optsize case.
598 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
599
600 for (auto VT : { MVT::f32, MVT::f64 }) {
601 // Use ANDPD to simulate FABS.
603
604 // Use XORP to simulate FNEG.
606
607 // Use ANDPD and ORPD to simulate FCOPYSIGN.
609
610 // These might be better off as horizontal vector ops.
613
614 // We don't support sin/cos/fmod
618 }
619
620 // Half type will be promoted by default.
621 setF16Action(MVT::f16, Promote);
629
659
660 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
661 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
662
663 // Lower this to MOVMSK plus an AND.
666
667 } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
668 (UseX87 || Is64Bit)) {
669 // Use SSE for f32, x87 for f64.
670 // Set up the FP register classes.
671 addRegisterClass(MVT::f32, &X86::FR32RegClass);
672 if (UseX87)
673 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
674
675 // Use ANDPS to simulate FABS.
677
678 // Use XORP to simulate FNEG.
680
681 if (UseX87)
683
684 // Use ANDPS and ORPS to simulate FCOPYSIGN.
685 if (UseX87)
688
689 // We don't support sin/cos/fmod
693
694 if (UseX87) {
695 // Always expand sin/cos functions even though x87 has an instruction.
699 }
700 } else if (UseX87) {
701 // f32 and f64 in x87.
702 // Set up the FP register classes.
703 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
704 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
705
706 for (auto VT : { MVT::f32, MVT::f64 }) {
709
710 // Always expand sin/cos functions even though x87 has an instruction.
714 }
715 }
716
717 // Expand FP32 immediates into loads from the stack, save special cases.
718 if (isTypeLegal(MVT::f32)) {
719 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
720 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
721 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
722 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
723 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
724 } else // SSE immediates.
725 addLegalFPImmediate(APFloat(+0.0f)); // xorps
726 }
727 // Expand FP64 immediates into loads from the stack, save special cases.
728 if (isTypeLegal(MVT::f64)) {
729 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
730 addLegalFPImmediate(APFloat(+0.0)); // FLD0
731 addLegalFPImmediate(APFloat(+1.0)); // FLD1
732 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
733 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
734 } else // SSE immediates.
735 addLegalFPImmediate(APFloat(+0.0)); // xorpd
736 }
737 // Support fp16 0 immediate.
738 if (isTypeLegal(MVT::f16))
739 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
740
741 // Handle constrained floating-point operations of scalar.
754
755 // We don't support FMA.
758
759 // f80 always uses X87.
760 if (UseX87) {
761 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
764 {
766 addLegalFPImmediate(TmpFlt); // FLD0
767 TmpFlt.changeSign();
768 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
769
770 bool ignored;
771 APFloat TmpFlt2(+1.0);
773 &ignored);
774 addLegalFPImmediate(TmpFlt2); // FLD1
775 TmpFlt2.changeSign();
776 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
777 }
778
779 // Always expand sin/cos functions even though x87 has an instruction.
783
795
796 // Handle constrained floating-point operations of scalar.
802 if (isTypeLegal(MVT::f16)) {
805 } else {
807 }
808 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
809 // as Custom.
811 }
812
813 // f128 uses xmm registers, but most operations require libcalls.
814 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
815 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
816 : &X86::VR128RegClass);
817
818 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
819
830
834
840 // No STRICT_FSINCOS
843
846 // We need to custom handle any FP_ROUND with an f128 input, but
847 // LegalizeDAG uses the result type to know when to run a custom handler.
848 // So we have to list all legal floating point result types here.
849 if (isTypeLegal(MVT::f32)) {
852 }
853 if (isTypeLegal(MVT::f64)) {
856 }
857 if (isTypeLegal(MVT::f80)) {
860 }
861
863
864 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
865 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
866 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
867 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
868 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
869 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
870 }
871
872 // Always use a library call for pow.
873 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
874 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
875 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
876 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
877
886
887 // Some FP actions are always expanded for vector types.
888 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
889 MVT::v4f32, MVT::v8f32, MVT::v16f32,
890 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
903 }
904
905 // First set operation action for all vector types to either promote
906 // (for widening) or expand (for scalarization). Then we will selectively
907 // turn on ones that can be effectively codegen'd.
947 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
948 setTruncStoreAction(InnerVT, VT, Expand);
949
952
953 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
954 // types, we have to deal with them whether we ask for Expansion or not.
955 // Setting Expand causes its own optimisation problems though, so leave
956 // them legal.
957 if (VT.getVectorElementType() == MVT::i1)
958 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
959
960 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
961 // split/scalarized right now.
962 if (VT.getVectorElementType() == MVT::f16 ||
963 VT.getVectorElementType() == MVT::bf16)
964 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
965 }
966 }
967
968 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
969 // with -msoft-float, disable use of MMX as well.
970 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
971 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
972 // No operations on x86mmx supported, everything uses intrinsics.
973 }
974
975 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
976 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
977 : &X86::VR128RegClass);
978
981
982 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
983 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
990
991 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
993
999 }
1000
1001 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
1002 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1003 : &X86::VR128RegClass);
1004
1005 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
1006 // registers cannot be used even for integer operations.
1007 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
1008 : &X86::VR128RegClass);
1009 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1010 : &X86::VR128RegClass);
1011 addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1012 : &X86::VR128RegClass);
1013 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1014 : &X86::VR128RegClass);
1015 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1016 : &X86::VR128RegClass);
1017
1018 for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
1021 }
1022
1023 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1024 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1029 }
1030
1031 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
1032 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
1033 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
1034
1035 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1036 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1037 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1038 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
1039 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
1040 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
1041 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
1042 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
1043 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
1044 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1047
1048 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
1049 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
1050 setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
1051
1052 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1053 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1055
1056 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1057 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
1058 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
1059 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
1060 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
1061 }
1062
1063 setOperationAction(ISD::ABDU, MVT::v16i8, Custom);
1064 setOperationAction(ISD::ABDS, MVT::v16i8, Custom);
1065 setOperationAction(ISD::ABDU, MVT::v8i16, Custom);
1066 setOperationAction(ISD::ABDS, MVT::v8i16, Custom);
1067 setOperationAction(ISD::ABDU, MVT::v4i32, Custom);
1068 setOperationAction(ISD::ABDS, MVT::v4i32, Custom);
1069
1080
1085
1086 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1090
1091 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1092 // setcc all the way to isel and prefer SETGT in some isel patterns.
1095 }
1096
1097 setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
1098 setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
1103
1104 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1110 }
1111
1112 for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1116
1117 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1118 continue;
1119
1122 }
1123 setF16Action(MVT::v8f16, Expand);
1124 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
1125 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
1126 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
1127 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
1128 setOperationAction(ISD::FNEG, MVT::v8f16, Custom);
1129 setOperationAction(ISD::FABS, MVT::v8f16, Custom);
1131
1132 // Custom lower v2i64 and v2f64 selects.
1139
1146
1147 // Custom legalize these to avoid over promotion or custom promotion.
1148 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1153 }
1154
1159
1162
1165
1166 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1171
1176
1177 // We want to legalize this to an f64 load rather than an i64 load on
1178 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1179 // store.
1180 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1181 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1182 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1183 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1184 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1186
1187 // Add 32-bit vector stores to help vectorization opportunities.
1188 setOperationAction(ISD::STORE, MVT::v2i16, Custom);
1190
1194 if (!Subtarget.hasAVX512())
1196
1200
1202
1219
1220 // In the customized shift lowering, the legal v4i32/v2i64 cases
1221 // in AVX2 will be recognized.
1222 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1226 if (VT == MVT::v2i64) continue;
1231 }
1232
1238 }
1239
1240 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1241 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1242 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1243 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1245 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1246 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1247 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1248 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1249
1250 // These might be better off as horizontal vector ops.
1255 }
1256
1257 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1258 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1261 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1265 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1271
1273 }
1274
1275 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1276 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1277 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1278 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1279 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1280 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1281 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1282 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1283
1284 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1287 }
1288
1292
1293 // FIXME: Do we need to handle scalar-to-vector here?
1294 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1295 setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
1296
1297 // We directly match byte blends in the backend as they match the VSELECT
1298 // condition form.
1300
1301 // SSE41 brings specific instructions for doing vector sign extend even in
1302 // cases where we don't have SRA.
1303 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1306 }
1307
1308 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1309 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1310 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1311 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1312 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1313 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1314 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1315 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1316 }
1317
1318 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1319 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1320 // do the pre and post work in the vector domain.
1323 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1324 // so that DAG combine doesn't try to turn it into uint_to_fp.
1327 }
1328 }
1329
1330 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1332 }
1333
1334 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1335 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1336 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1339 }
1340
1341 // XOP can efficiently perform BITREVERSE with VPPERM.
1342 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1344
1345 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1346 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1348 }
1349
1350 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1351 bool HasInt256 = Subtarget.hasInt256();
1352
1353 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1354 : &X86::VR256RegClass);
1355 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1356 : &X86::VR256RegClass);
1357 addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1358 : &X86::VR256RegClass);
1359 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1360 : &X86::VR256RegClass);
1361 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1362 : &X86::VR256RegClass);
1363 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1364 : &X86::VR256RegClass);
1365 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1366 : &X86::VR256RegClass);
1367
1368 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1381
1383
1387
1390 }
1391
1392 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1393 // even though v8i16 is a legal type.
1394 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1395 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1396 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1397 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1401
1408
1420
1421 if (!Subtarget.hasAVX512())
1423
1424 // In the customized shift lowering, the legal v8i32/v4i64 cases
1425 // in AVX2 will be recognized.
1426 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1432 if (VT == MVT::v4i64) continue;
1437 }
1438
1439 // These types need custom splitting if their input is a 128-bit vector.
1444
1448 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1449 setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
1452
1453 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1457 }
1458
1463
1465
1466 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1470
1471 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1472 // setcc all the way to isel and prefer SETGT in some isel patterns.
1475 }
1476
1477 setOperationAction(ISD::SETCC, MVT::v4f64, Custom);
1478 setOperationAction(ISD::SETCC, MVT::v8f32, Custom);
1483
1484 if (Subtarget.hasAnyFMA()) {
1485 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1486 MVT::v2f64, MVT::v4f64 }) {
1489 }
1490 }
1491
1492 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1493 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1494 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1495 }
1496
1497 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1498 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1499 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1500 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1501
1502 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1503 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1504 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1505 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1506 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1507 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1508 setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
1509 setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
1510
1511 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1512 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1513
1514 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1515 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1516 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1517 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1518 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1519
1520 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1521 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1522 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1523 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1524 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1525 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1526 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1527 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1532
1533 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1534 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1535 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1536 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1537 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1538 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1539 }
1540
1541 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1544 }
1545
1546 if (HasInt256) {
1547 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1548 // when we have a 256bit-wide blend with immediate.
1551
1552 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1553 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1554 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1555 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1556 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1557 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1558 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1559 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1560 }
1561 }
1562
1563 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1564 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1565 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1567 }
1568
1569 // Extract subvector is special because the value type
1570 // (result) is 128-bit but the source is 256-bit wide.
1571 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1572 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1574 }
1575
1576 // Custom lower several nodes for 256-bit types.
1577 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1578 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1588 }
1589 setF16Action(MVT::v16f16, Expand);
1590 setOperationAction(ISD::FNEG, MVT::v16f16, Custom);
1591 setOperationAction(ISD::FABS, MVT::v16f16, Custom);
1593 setOperationAction(ISD::FADD, MVT::v16f16, Expand);
1594 setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
1595 setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
1596 setOperationAction(ISD::FDIV, MVT::v16f16, Expand);
1597
1598 if (HasInt256) {
1600
1601 // Custom legalize 2x32 to get a little better code.
1604
1605 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1606 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1608 }
1609 }
1610
1611 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1612 Subtarget.hasF16C()) {
1613 for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1616 }
1617 for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
1620 }
1621 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1622 setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
1623 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1624 }
1625 }
1626
1627 // This block controls legalization of the mask vector sizes that are
1628 // available with AVX512. 512-bit vectors are in a separate block controlled
1629 // by useAVX512Regs.
1630 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1631 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1632 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1633 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1634 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1635 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1636
1640
1641 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1642 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1643 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1644 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1645 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1646 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1647 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1648 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1653
1654 // There is no byte sized k-register load or store without AVX512DQ.
1655 if (!Subtarget.hasDQI()) {
1656 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1657 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1658 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1659 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1660
1665 }
1666
1667 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1668 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1672 }
1673
1674 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1676
1677 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1681
1688 }
1689
1690 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1692 }
1693
1694 // This block controls legalization for 512-bit operations with 8/16/32/64 bit
1695 // elements. 512-bits can be disabled based on prefer-vector-width and
1696 // required-vector-width function attributes.
1697 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1698 bool HasBWI = Subtarget.hasBWI();
1699
1700 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1701 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1702 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1703 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1704 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1705 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1706 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1707
1708 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1709 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1710 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1711 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1712 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1713 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1714 if (HasBWI)
1715 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1716 }
1717
1718 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1726 }
1727
1728 for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1733 }
1734
1735 for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1740 }
1741
1748
1760
1761 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1762 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1763 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1764 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1765 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1766 if (HasBWI)
1767 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1768
1769 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1770 // to 512-bit rather than use the AVX2 instructions so that we can use
1771 // k-masks.
1772 if (!Subtarget.hasVLX()) {
1773 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1774 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1777 }
1778 }
1779
1781 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1782 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1792
1793 if (HasBWI) {
1794 // Extends from v64i1 masks to 512-bit vectors.
1798 }
1799
1800 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1813
1815 }
1816
1817 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1820 }
1821
1822 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1823 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1824 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1825 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1826
1827 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1828 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1829 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1830 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1831
1832 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1833 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1834 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1835 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1836 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1837 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1838 setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
1839 setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
1840
1841 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1842 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1843
1845
1846 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1855
1856 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1857 // setcc all the way to isel and prefer SETGT in some isel patterns.
1860 }
1861
1862 setOperationAction(ISD::SETCC, MVT::v8f64, Custom);
1863 setOperationAction(ISD::SETCC, MVT::v16f32, Custom);
1868
1869 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1876 }
1877
1878 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1879 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1880 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1882 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1883 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1884 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1885 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1890 }
1891
1892 setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
1893 setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
1894 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1895 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1896 setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
1897 setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
1898
1899 if (Subtarget.hasDQI()) {
1903 setOperationAction(Opc, MVT::v8i64, Custom);
1904 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1905 }
1906
1907 if (Subtarget.hasCDI()) {
1908 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1909 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1911 }
1912 } // Subtarget.hasCDI()
1913
1914 if (Subtarget.hasVPOPCNTDQ()) {
1915 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1917 }
1918
1919 // Extract subvector is special because the value type
1920 // (result) is 256-bit but the source is 512-bit wide.
1921 // 128-bit was made Legal under AVX1.
1922 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1923 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1925
1926 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1927 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
1937 }
1938 setF16Action(MVT::v32f16, Expand);
1943 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1944 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1945 setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
1946 }
1947
1948 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1953 }
1954 if (HasBWI) {
1955 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1958 }
1959 } else {
1960 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1961 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1962 }
1963
1964 if (Subtarget.hasVBMI2()) {
1965 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1968 }
1969
1970 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1971 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1972 }
1973 }// useAVX512Regs
1974
1975 if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
1976 for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
1977 MVT::v4i64}) {
1980 }
1981 }
1982
1983 // This block controls legalization for operations that don't have
1984 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1985 // narrower widths.
1986 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1987 // These operations are handled on non-VLX by artificially widening in
1988 // isel patterns.
1989
1993
1994 if (Subtarget.hasDQI()) {
1995 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1996 // v2f32 UINT_TO_FP is already custom under SSE2.
1999 "Unexpected operation action!");
2000 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
2005 }
2006
2007 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
2013 }
2014
2015 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2018 }
2019
2020 // Custom legalize 2x32 to get a little better code.
2023
2024 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2025 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2027
2028 if (Subtarget.hasDQI()) {
2032 setOperationAction(Opc, MVT::v2i64, Custom);
2033 setOperationAction(Opc, MVT::v4i64, Custom);
2034 }
2035 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
2036 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
2037 }
2038
2039 if (Subtarget.hasCDI()) {
2040 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2042 }
2043 } // Subtarget.hasCDI()
2044
2045 if (Subtarget.hasVPOPCNTDQ()) {
2046 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2048 }
2049 setOperationAction(ISD::FNEG, MVT::v32f16, Custom);
2050 setOperationAction(ISD::FABS, MVT::v32f16, Custom);
2052 }
2053
2054 // This block control legalization of v32i1/v64i1 which are available with
2055 // AVX512BW..
2056 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2057 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
2058 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
2059
2060 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
2071 }
2072
2073 for (auto VT : { MVT::v16i1, MVT::v32i1 })
2075
2076 // Extends from v32i1 masks to 256-bit vectors.
2080
2081 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2082 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
2083 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
2084 }
2085
2086 // These operations are handled on non-VLX by artificially widening in
2087 // isel patterns.
2088 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
2089
2090 if (Subtarget.hasBITALG()) {
2091 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2093 }
2094 }
2095
2096 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2097 auto setGroup = [&] (MVT VT) {
2108
2121
2123
2126
2132
2138
2142 };
2143
2144 // AVX512_FP16 scalar operations
2145 setGroup(MVT::f16);
2159
2162
2163 if (Subtarget.useAVX512Regs()) {
2164 setGroup(MVT::v32f16);
2170 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
2177
2182 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
2184 MVT::v32i16);
2185 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
2187 MVT::v32i16);
2188 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
2190 MVT::v32i16);
2191 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
2193 MVT::v32i16);
2194
2198
2199 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2200 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2201 }
2202
2203 if (Subtarget.hasVLX()) {
2204 setGroup(MVT::v8f16);
2205 setGroup(MVT::v16f16);
2206
2217
2228
2229 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2232
2236
2237 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2238 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2239 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2240 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2241
2242 // Need to custom widen these to prevent scalarization.
2243 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2244 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2245 }
2246 }
2247
2248 if (!Subtarget.useSoftFloat() &&
2249 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2250 addRegisterClass(MVT::v8bf16, Subtarget.hasAVX512() ? &X86::VR128XRegClass
2251 : &X86::VR128RegClass);
2252 addRegisterClass(MVT::v16bf16, Subtarget.hasAVX512() ? &X86::VR256XRegClass
2253 : &X86::VR256RegClass);
2254 // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
2255 // provide the method to promote BUILD_VECTOR and INSERT_VECTOR_ELT.
2256 // Set the operation action Custom to do the customization later.
2259 for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2260 setF16Action(VT, Expand);
2269 }
2271 addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
2272 }
2273
2274 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2275 addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
2276 setF16Action(MVT::v32bf16, Expand);
2277 setOperationAction(ISD::FADD, MVT::v32bf16, Expand);
2278 setOperationAction(ISD::FSUB, MVT::v32bf16, Expand);
2279 setOperationAction(ISD::FMUL, MVT::v32bf16, Expand);
2280 setOperationAction(ISD::FDIV, MVT::v32bf16, Expand);
2282 setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom);
2286 }
2287
2288 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2289 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2290 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2291 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2292 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2293 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2294
2295 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2296 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2297 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2298 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2299 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2300
2301 if (Subtarget.hasBWI()) {
2302 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2303 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2304 }
2305
2306 if (Subtarget.hasFP16()) {
2307 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2316 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2325 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2330 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2335 }
2336 }
2337
2338 if (!Subtarget.useSoftFloat() && Subtarget.hasAMXTILE()) {
2339 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2340 }
2341
2342 // We want to custom lower some of our intrinsics.
2346 if (!Subtarget.is64Bit()) {
2348 }
2349
2350 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2351 // handle type legalization for these operations here.
2352 //
2353 // FIXME: We really should do custom legalization for addition and
2354 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2355 // than generic legalization for 64-bit multiplication-with-overflow, though.
2356 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2357 if (VT == MVT::i64 && !Subtarget.is64Bit())
2358 continue;
2359 // Add/Sub/Mul with overflow operations are custom lowered.
2366
2367 // Support carry in as value rather than glue.
2373 }
2374
2375 if (!Subtarget.is64Bit()) {
2376 // These libcalls are not available in 32-bit.
2377 setLibcallName(RTLIB::SHL_I128, nullptr);
2378 setLibcallName(RTLIB::SRL_I128, nullptr);
2379 setLibcallName(RTLIB::SRA_I128, nullptr);
2380 setLibcallName(RTLIB::MUL_I128, nullptr);
2381 // The MULO libcall is not part of libgcc, only compiler-rt.
2382 setLibcallName(RTLIB::MULO_I64, nullptr);
2383 }
2384 // The MULO libcall is not part of libgcc, only compiler-rt.
2385 setLibcallName(RTLIB::MULO_I128, nullptr);
2386
2387 // Combine sin / cos into _sincos_stret if it is available.
2388 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2389 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2392 }
2393
2394 if (Subtarget.isTargetWin64()) {
2395 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2396 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2397 setOperationAction(ISD::SREM, MVT::i128, Custom);
2398 setOperationAction(ISD::UREM, MVT::i128, Custom);
2407 }
2408
2409 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2410 // is. We should promote the value to 64-bits to solve this.
2411 // This is what the CRT headers do - `fmodf` is an inline header
2412 // function casting to f64 and calling `fmod`.
2413 if (Subtarget.is32Bit() &&
2414 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2415 for (ISD::NodeType Op :
2425 if (isOperationExpand(Op, MVT::f32))
2426 setOperationAction(Op, MVT::f32, Promote);
2427
2428 // We have target-specific dag combine patterns for the following nodes:
2439 ISD::SHL,
2440 ISD::SRA,
2441 ISD::SRL,
2442 ISD::OR,
2443 ISD::AND,
2445 ISD::ADD,
2446 ISD::FADD,
2447 ISD::FSUB,
2448 ISD::FNEG,
2449 ISD::FMA,
2453 ISD::SUB,
2454 ISD::LOAD,
2455 ISD::MLOAD,
2456 ISD::STORE,
2470 ISD::SETCC,
2471 ISD::MUL,
2472 ISD::XOR,
2480
2482
2483 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2485 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2487 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2489
2490 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2491 // that needs to benchmarked and balanced with the potential use of vector
2492 // load/store types (PR33329, PR33914).
2495
2496 // Default loop alignment, which can be overridden by -align-loops.
2498
2499 // An out-of-order CPU can speculatively execute past a predictable branch,
2500 // but a conditional move could be stalled by an expensive earlier operation.
2501 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2502 EnableExtLdPromotion = true;
2504
2506
2507 // Default to having -disable-strictnode-mutation on
2508 IsStrictFPEnabled = true;
2509}
2510
2511// This has so far only been implemented for 64-bit MachO.
2513 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2514}
2515
2517 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2518 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2519}
2520
2522 const SDLoc &DL) const {
2523 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2524 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2525 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2526 return SDValue(Node, 0);
2527}
2528
2531 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2532 !Subtarget.hasBWI())
2533 return TypeSplitVector;
2534
2535 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2536 !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
2537 return TypeSplitVector;
2538
2539 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2540 VT.getVectorElementType() != MVT::i1)
2541 return TypeWidenVector;
2542
2544}
2545
2546FastISel *
2548 const TargetLibraryInfo *libInfo) const {
2549 return X86::createFastISel(funcInfo, libInfo);
2550}
2551
2552//===----------------------------------------------------------------------===//
2553// Other Lowering Hooks
2554//===----------------------------------------------------------------------===//
2555
2557 bool AssumeSingleUse) {
2558 if (!AssumeSingleUse && !Op.hasOneUse())
2559 return false;
2560 if (!ISD::isNormalLoad(Op.getNode()))
2561 return false;
2562
2563 // If this is an unaligned vector, make sure the target supports folding it.
2564 auto *Ld = cast<LoadSDNode>(Op.getNode());
2565 if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
2566 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
2567 return false;
2568
2569 // TODO: If this is a non-temporal load and the target has an instruction
2570 // for it, it should not be folded. See "useNonTemporalLoad()".
2571
2572 return true;
2573}
2574
2576 const X86Subtarget &Subtarget,
2577 bool AssumeSingleUse) {
2578 assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory");
2579 if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
2580 return false;
2581
2582 // We can not replace a wide volatile load with a broadcast-from-memory,
2583 // because that would narrow the load, which isn't legal for volatiles.
2584 auto *Ld = cast<LoadSDNode>(Op.getNode());
2585 return !Ld->isVolatile() ||
2586 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
2587}
2588
2590 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
2591}
2592
2594 if (Op.hasOneUse()) {
2595 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
2596 return (ISD::ZERO_EXTEND == Opcode);
2597 }
2598 return false;
2599}
2600
2601static bool isTargetShuffle(unsigned Opcode) {
2602 switch(Opcode) {
2603 default: return false;
2604 case X86ISD::BLENDI:
2605 case X86ISD::PSHUFB:
2606 case X86ISD::PSHUFD:
2607 case X86ISD::PSHUFHW:
2608 case X86ISD::PSHUFLW:
2609 case X86ISD::SHUFP:
2610 case X86ISD::INSERTPS:
2611 case X86ISD::EXTRQI:
2612 case X86ISD::INSERTQI:
2613 case X86ISD::VALIGN:
2614 case X86ISD::PALIGNR:
2615 case X86ISD::VSHLDQ:
2616 case X86ISD::VSRLDQ:
2617 case X86ISD::MOVLHPS:
2618 case X86ISD::MOVHLPS:
2619 case X86ISD::MOVSHDUP:
2620 case X86ISD::MOVSLDUP:
2621 case X86ISD::MOVDDUP:
2622 case X86ISD::MOVSS:
2623 case X86ISD::MOVSD:
2624 case X86ISD::MOVSH:
2625 case X86ISD::UNPCKL:
2626 case X86ISD::UNPCKH:
2627 case X86ISD::VBROADCAST:
2628 case X86ISD::VPERMILPI:
2629 case X86ISD::VPERMILPV:
2630 case X86ISD::VPERM2X128:
2631 case X86ISD::SHUF128:
2632 case X86ISD::VPERMIL2:
2633 case X86ISD::VPERMI:
2634 case X86ISD::VPPERM:
2635 case X86ISD::VPERMV:
2636 case X86ISD::VPERMV3:
2637 case X86ISD::VZEXT_MOVL:
2638 return true;
2639 }
2640}
2641
2642static bool isTargetShuffleVariableMask(unsigned Opcode) {
2643 switch (Opcode) {
2644 default: return false;
2645 // Target Shuffles.
2646 case X86ISD::PSHUFB:
2647 case X86ISD::VPERMILPV:
2648 case X86ISD::VPERMIL2:
2649 case X86ISD::VPPERM:
2650 case X86ISD::VPERMV:
2651 case X86ISD::VPERMV3:
2652 return true;
2653 // 'Faux' Target Shuffles.
2654 case ISD::OR:
2655 case ISD::AND:
2656 case X86ISD::ANDNP:
2657 return true;
2658 }
2659}
2660
2663 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2665 int ReturnAddrIndex = FuncInfo->getRAIndex();
2666
2667 if (ReturnAddrIndex == 0) {
2668 // Set up a frame object for the return address.
2669 unsigned SlotSize = RegInfo->getSlotSize();
2670 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
2671 -(int64_t)SlotSize,
2672 false);
2673 FuncInfo->setRAIndex(ReturnAddrIndex);
2674 }
2675
2676 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
2677}
2678
2680 bool HasSymbolicDisplacement) {
2681 // Offset should fit into 32 bit immediate field.
2682 if (!isInt<32>(Offset))
2683 return false;
2684
2685 // If we don't have a symbolic displacement - we don't have any extra
2686 // restrictions.
2687 if (!HasSymbolicDisplacement)
2688 return true;
2689
2690 // We can fold large offsets in the large code model because we always use
2691 // 64-bit offsets.
2692 if (CM == CodeModel::Large)
2693 return true;
2694
2695 // For kernel code model we know that all object resist in the negative half
2696 // of 32bits address space. We may not accept negative offsets, since they may
2697 // be just off and we may accept pretty large positive ones.
2698 if (CM == CodeModel::Kernel)
2699 return Offset >= 0;
2700
2701 // For other non-large code models we assume that latest small object is 16MB
2702 // before end of 31 bits boundary. We may also accept pretty large negative
2703 // constants knowing that all objects are in the positive half of address
2704 // space.
2705 return Offset < 16 * 1024 * 1024;
2706}
2707
2708/// Return true if the condition is an signed comparison operation.
2709static bool isX86CCSigned(unsigned X86CC) {
2710 switch (X86CC) {
2711 default:
2712 llvm_unreachable("Invalid integer condition!");
2713 case X86::COND_E:
2714 case X86::COND_NE:
2715 case X86::COND_B:
2716 case X86::COND_A:
2717 case X86::COND_BE:
2718 case X86::COND_AE:
2719 return false;
2720 case X86::COND_G:
2721 case X86::COND_GE:
2722 case X86::COND_L:
2723 case X86::COND_LE:
2724 return true;
2725 }
2726}
2727
2729 switch (SetCCOpcode) {
2730 // clang-format off
2731 default: llvm_unreachable("Invalid integer condition!");
2732 case ISD::SETEQ: return X86::COND_E;
2733 case ISD::SETGT: return X86::COND_G;
2734 case ISD::SETGE: return X86::COND_GE;
2735 case ISD::SETLT: return X86::COND_L;
2736 case ISD::SETLE: return X86::COND_LE;
2737 case ISD::SETNE: return X86::COND_NE;
2738 case ISD::SETULT: return X86::COND_B;
2739 case ISD::SETUGT: return X86::COND_A;
2740 case ISD::SETULE: return X86::COND_BE;
2741 case ISD::SETUGE: return X86::COND_AE;
2742 // clang-format on
2743 }
2744}
2745
2746/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
2747/// condition code, returning the condition code and the LHS/RHS of the
2748/// comparison to make.
2750 bool isFP, SDValue &LHS, SDValue &RHS,
2751 SelectionDAG &DAG) {
2752 if (!isFP) {
2753 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2754 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
2755 // X > -1 -> X == 0, jump !sign.
2756 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2757 return X86::COND_NS;
2758 }
2759 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
2760 // X < 0 -> X == 0, jump on sign.
2761 return X86::COND_S;
2762 }
2763 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
2764 // X >= 0 -> X == 0, jump on !sign.
2765 return X86::COND_NS;
2766 }
2767 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
2768 // X < 1 -> X <= 0
2769 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2770 return X86::COND_LE;
2771 }
2772 }
2773
2774 return TranslateIntegerX86CC(SetCCOpcode);
2775 }
2776
2777 // First determine if it is required or is profitable to flip the operands.
2778
2779 // If LHS is a foldable load, but RHS is not, flip the condition.
2780 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
2781 !ISD::isNON_EXTLoad(RHS.getNode())) {
2782 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
2783 std::swap(LHS, RHS);
2784 }
2785
2786 switch (SetCCOpcode) {
2787 default: break;
2788 case ISD::SETOLT:
2789 case ISD::SETOLE:
2790 case ISD::SETUGT:
2791 case ISD::SETUGE:
2792 std::swap(LHS, RHS);
2793 break;
2794 }
2795
2796 // On a floating point condition, the flags are set as follows:
2797 // ZF PF CF op
2798 // 0 | 0 | 0 | X > Y
2799 // 0 | 0 | 1 | X < Y
2800 // 1 | 0 | 0 | X == Y
2801 // 1 | 1 | 1 | unordered
2802 switch (SetCCOpcode) {
2803 // clang-format off
2804 default: llvm_unreachable("Condcode should be pre-legalized away");
2805 case ISD::SETUEQ:
2806 case ISD::SETEQ: return X86::COND_E;
2807 case ISD::SETOLT: // flipped
2808 case ISD::SETOGT:
2809 case ISD::SETGT: return X86::COND_A;
2810 case ISD::SETOLE: // flipped
2811 case ISD::SETOGE:
2812 case ISD::SETGE: return X86::COND_AE;
2813 case ISD::SETUGT: // flipped
2814 case ISD::SETULT:
2815 case ISD::SETLT: return X86::COND_B;
2816 case ISD::SETUGE: // flipped
2817 case ISD::SETULE:
2818 case ISD::SETLE: return X86::COND_BE;
2819 case ISD::SETONE:
2820 case ISD::SETNE: return X86::COND_NE;
2821 case ISD::SETUO: return X86::COND_P;
2822 case ISD::SETO: return X86::COND_NP;
2823 case ISD::SETOEQ:
2824 case ISD::SETUNE: return X86::COND_INVALID;
2825 // clang-format on
2826 }
2827}
2828
2829/// Is there a floating point cmov for the specific X86 condition code?
2830/// Current x86 isa includes the following FP cmov instructions:
2831/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
2832static bool hasFPCMov(unsigned X86CC) {
2833 switch (X86CC) {
2834 default:
2835 return false;
2836 case X86::COND_B:
2837 case X86::COND_BE:
2838 case X86::COND_E:
2839 case X86::COND_P:
2840 case X86::COND_A:
2841 case X86::COND_AE:
2842 case X86::COND_NE:
2843 case X86::COND_NP:
2844 return true;
2845 }
2846}
2847
2848static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
2849 return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
2850 VT.is512BitVector();
2851}
2852
2854 const CallInst &I,
2855 MachineFunction &MF,
2856 unsigned Intrinsic) const {
2858 Info.offset = 0;
2859
2860 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
2861 if (!IntrData) {
2862 switch (Intrinsic) {
2863 case Intrinsic::x86_aesenc128kl:
2864 case Intrinsic::x86_aesdec128kl:
2866 Info.ptrVal = I.getArgOperand(1);
2867 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
2868 Info.align = Align(1);
2870 return true;
2871 case Intrinsic::x86_aesenc256kl:
2872 case Intrinsic::x86_aesdec256kl:
2874 Info.ptrVal = I.getArgOperand(1);
2875 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
2876 Info.align = Align(1);
2878 return true;
2879 case Intrinsic::x86_aesencwide128kl:
2880 case Intrinsic::x86_aesdecwide128kl:
2882 Info.ptrVal = I.getArgOperand(0);
2883 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
2884 Info.align = Align(1);
2886 return true;
2887 case Intrinsic::x86_aesencwide256kl:
2888 case Intrinsic::x86_aesdecwide256kl:
2890 Info.ptrVal = I.getArgOperand(0);
2891 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
2892 Info.align = Align(1);
2894 return true;
2895 case Intrinsic::x86_cmpccxadd32:
2896 case Intrinsic::x86_cmpccxadd64:
2897 case Intrinsic::x86_atomic_bts:
2898 case Intrinsic::x86_atomic_btc:
2899 case Intrinsic::x86_atomic_btr: {
2901 Info.ptrVal = I.getArgOperand(0);
2902 unsigned Size = I.getType()->getScalarSizeInBits();
2903 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
2904 Info.align = Align(Size);
2907 return true;
2908 }
2909 case Intrinsic::x86_atomic_bts_rm:
2910 case Intrinsic::x86_atomic_btc_rm:
2911 case Intrinsic::x86_atomic_btr_rm: {
2913 Info.ptrVal = I.getArgOperand(0);
2914 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
2915 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
2916 Info.align = Align(Size);
2919 return true;
2920 }
2921 case Intrinsic::x86_aadd32:
2922 case Intrinsic::x86_aadd64:
2923 case Intrinsic::x86_aand32:
2924 case Intrinsic::x86_aand64:
2925 case Intrinsic::x86_aor32:
2926 case Intrinsic::x86_aor64:
2927 case Intrinsic::x86_axor32:
2928 case Intrinsic::x86_axor64:
2929 case Intrinsic::x86_atomic_add_cc:
2930 case Intrinsic::x86_atomic_sub_cc:
2931 case Intrinsic::x86_atomic_or_cc:
2932 case Intrinsic::x86_atomic_and_cc:
2933 case Intrinsic::x86_atomic_xor_cc: {
2935 Info.ptrVal = I.getArgOperand(0);
2936 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
2937 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
2938 Info.align = Align(Size);
2941 return true;
2942 }
2943 }
2944 return false;
2945 }
2946
2947 switch (IntrData->Type) {
2950 case TRUNCATE_TO_MEM_VI32: {
2952 Info.ptrVal = I.getArgOperand(0);
2953 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
2955 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
2956 ScalarVT = MVT::i8;
2957 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
2958 ScalarVT = MVT::i16;
2959 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
2960 ScalarVT = MVT::i32;
2961
2962 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
2963 Info.align = Align(1);
2965 break;
2966 }
2967 case GATHER:
2968 case GATHER_AVX2: {
2970 Info.ptrVal = nullptr;
2971 MVT DataVT = MVT::getVT(I.getType());
2972 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
2973 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
2974 IndexVT.getVectorNumElements());
2975 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
2976 Info.align = Align(1);
2978 break;
2979 }
2980 case SCATTER: {
2982 Info.ptrVal = nullptr;
2983 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
2984 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
2985 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
2986 IndexVT.getVectorNumElements());
2987 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
2988 Info.align = Align(1);
2990 break;
2991 }
2992 default:
2993 return false;
2994 }
2995
2996 return true;
2997}
2998
2999/// Returns true if the target can instruction select the
3000/// specified FP immediate natively. If false, the legalizer will
3001/// materialize the FP immediate as a load from a constant pool.
3003 bool ForCodeSize) const {
3004 for (const APFloat &FPImm : LegalFPImmediates)
3005 if (Imm.bitwiseIsEqual(FPImm))
3006 return true;
3007 return false;
3008}
3009
3011 ISD::LoadExtType ExtTy,
3012 EVT NewVT) const {
3013 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");
3014
3015 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
3016 // relocation target a movq or addq instruction: don't let the load shrink.
3017 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
3018 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
3019 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
3020 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
3021
3022 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
3023 // those uses are extracted directly into a store, then the extract + store
3024 // can be store-folded. Therefore, it's probably not worth splitting the load.
3025 EVT VT = Load->getValueType(0);
3026 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
3027 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
3028 // Skip uses of the chain value. Result 0 of the node is the load value.
3029 if (UI.getUse().getResNo() != 0)
3030 continue;
3031
3032 // If this use is not an extract + store, it's probably worth splitting.
3033 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
3034 UI->use_begin()->getOpcode() != ISD::STORE)
3035 return true;
3036 }
3037 // All non-chain uses are extract + store.
3038 return false;
3039 }
3040
3041 return true;
3042}
3043
3044/// Returns true if it is beneficial to convert a load of a constant
3045/// to just the constant itself.
3047 Type *Ty) const {
3048 assert(Ty->isIntegerTy());
3049
3050 unsigned BitSize = Ty->getPrimitiveSizeInBits();
3051 if (BitSize == 0 || BitSize > 64)
3052 return false;
3053 return true;
3054}
3055
3057 // If we are using XMM registers in the ABI and the condition of the select is
3058 // a floating-point compare and we have blendv or conditional move, then it is
3059 // cheaper to select instead of doing a cross-register move and creating a
3060 // load that depends on the compare result.
3061 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
3062 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
3063}
3064
3066 // TODO: It might be a win to ease or lift this restriction, but the generic
3067 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
3068 if (VT.isVector() && Subtarget.hasAVX512())
3069 return false;
3070
3071 return true;
3072}
3073
3075 SDValue C) const {
3076 // TODO: We handle scalars using custom code, but generic combining could make
3077 // that unnecessary.
3078 APInt MulC;
3079 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
3080 return false;
3081
3082 // Find the type this will be legalized too. Otherwise we might prematurely
3083 // convert this to shl+add/sub and then still have to type legalize those ops.
3084 // Another choice would be to defer the decision for illegal types until
3085 // after type legalization. But constant splat vectors of i64 can't make it
3086 // through type legalization on 32-bit targets so we would need to special
3087 // case vXi64.
3088 while (getTypeAction(Context, VT) != TypeLegal)
3089 VT = getTypeToTransformTo(Context, VT);
3090
3091 // If vector multiply is legal, assume that's faster than shl + add/sub.
3092 // Multiply is a complex op with higher latency and lower throughput in
3093 // most implementations, sub-vXi32 vector multiplies are always fast,
3094 // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
3095 // is always going to be slow.
3096 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3097 if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
3098 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
3099 return false;
3100
3101 // shl+add, shl+sub, shl+add+neg
3102 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
3103 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
3104}
3105
3107 unsigned Index) const {
3109 return false;
3110
3111 // Mask vectors support all subregister combinations and operations that
3112 // extract half of vector.
3113 if (ResVT.getVectorElementType() == MVT::i1)
3114 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
3115 (Index == ResVT.getVectorNumElements()));
3116
3117 return (Index % ResVT.getVectorNumElements()) == 0;
3118}
3119
3121 unsigned Opc = VecOp.getOpcode();
3122
3123 // Assume target opcodes can't be scalarized.
3124 // TODO - do we have any exceptions?
3125 if (Opc >= ISD::BUILTIN_OP_END)
3126 return false;
3127
3128 // If the vector op is not supported, try to convert to scalar.
3129 EVT VecVT = VecOp.getValueType();
3130 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
3131 return true;
3132
3133 // If the vector op is supported, but the scalar op is not, the transform may
3134 // not be worthwhile.
3135 EVT ScalarVT = VecVT.getScalarType();
3136 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
3137}
3138
3140 bool) const {
3141 // TODO: Allow vectors?
3142 if (VT.isVector())
3143 return false;
3144 return VT.isSimple() || !isOperationExpand(Opcode, VT);
3145}
3146
3148 // Speculate cttz only if we can directly use TZCNT or can promote to i32.
3149 return Subtarget.hasBMI() ||
3150 (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
3151}
3152
3154 // Speculate ctlz only if we can directly use LZCNT.
3155 return Subtarget.hasLZCNT();
3156}
3157
3159 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
3160 // expensive than a straight movsd. On the other hand, it's important to
3161 // shrink long double fp constant since fldt is very slow.
3162 return !Subtarget.hasSSE2() || VT == MVT::f80;
3163}
3164
3166 return (VT == MVT::f64 && Subtarget.hasSSE2()) ||
3167 (VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16;
3168}