72#include "llvm/IR/IntrinsicsAArch64.h"
108#define DEBUG_TYPE "aarch64-lower"
111STATISTIC(NumShiftInserts,
"Number of vector shift inserts");
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
152 AArch64::X3, AArch64::X4, AArch64::X5,
153 AArch64::X6, AArch64::X7};
155 AArch64::Q3, AArch64::Q4, AArch64::Q5,
156 AArch64::Q6, AArch64::Q7};
181 return MVT::nxv8bf16;
188 switch (EC.getKnownMinValue()) {
204 "Expected scalable predicate vector type!");
226 "Expected legal vector type!");
272 switch (
Op.getOpcode()) {
283 switch (
Op.getConstantOperandVal(0)) {
286 case Intrinsic::aarch64_sve_ptrue:
287 case Intrinsic::aarch64_sve_pnext:
288 case Intrinsic::aarch64_sve_cmpeq:
289 case Intrinsic::aarch64_sve_cmpne:
290 case Intrinsic::aarch64_sve_cmpge:
291 case Intrinsic::aarch64_sve_cmpgt:
292 case Intrinsic::aarch64_sve_cmphs:
293 case Intrinsic::aarch64_sve_cmphi:
294 case Intrinsic::aarch64_sve_cmpeq_wide:
295 case Intrinsic::aarch64_sve_cmpne_wide:
296 case Intrinsic::aarch64_sve_cmpge_wide:
297 case Intrinsic::aarch64_sve_cmpgt_wide:
298 case Intrinsic::aarch64_sve_cmplt_wide:
299 case Intrinsic::aarch64_sve_cmple_wide:
300 case Intrinsic::aarch64_sve_cmphs_wide:
301 case Intrinsic::aarch64_sve_cmphi_wide:
302 case Intrinsic::aarch64_sve_cmplo_wide:
303 case Intrinsic::aarch64_sve_cmpls_wide:
304 case Intrinsic::aarch64_sve_fcmpeq:
305 case Intrinsic::aarch64_sve_fcmpne:
306 case Intrinsic::aarch64_sve_fcmpge:
307 case Intrinsic::aarch64_sve_fcmpgt:
308 case Intrinsic::aarch64_sve_fcmpuo:
309 case Intrinsic::aarch64_sve_facgt:
310 case Intrinsic::aarch64_sve_facge:
311 case Intrinsic::aarch64_sve_whilege:
312 case Intrinsic::aarch64_sve_whilegt:
313 case Intrinsic::aarch64_sve_whilehi:
314 case Intrinsic::aarch64_sve_whilehs:
315 case Intrinsic::aarch64_sve_whilele:
316 case Intrinsic::aarch64_sve_whilelo:
317 case Intrinsic::aarch64_sve_whilels:
318 case Intrinsic::aarch64_sve_whilelt:
319 case Intrinsic::aarch64_sve_match:
320 case Intrinsic::aarch64_sve_nmatch:
321 case Intrinsic::aarch64_sve_whilege_x2:
322 case Intrinsic::aarch64_sve_whilegt_x2:
323 case Intrinsic::aarch64_sve_whilehi_x2:
324 case Intrinsic::aarch64_sve_whilehs_x2:
325 case Intrinsic::aarch64_sve_whilele_x2:
326 case Intrinsic::aarch64_sve_whilelo_x2:
327 case Intrinsic::aarch64_sve_whilels_x2:
328 case Intrinsic::aarch64_sve_whilelt_x2:
348 if (Subtarget->hasLS64()) {
354 if (Subtarget->hasFPARMv8()) {
362 if (Subtarget->hasNEON()) {
366 addDRTypeForNEON(MVT::v2f32);
367 addDRTypeForNEON(MVT::v8i8);
368 addDRTypeForNEON(MVT::v4i16);
369 addDRTypeForNEON(MVT::v2i32);
370 addDRTypeForNEON(MVT::v1i64);
371 addDRTypeForNEON(MVT::v1f64);
372 addDRTypeForNEON(MVT::v4f16);
373 if (Subtarget->hasBF16())
374 addDRTypeForNEON(MVT::v4bf16);
376 addQRTypeForNEON(MVT::v4f32);
377 addQRTypeForNEON(MVT::v2f64);
378 addQRTypeForNEON(MVT::v16i8);
379 addQRTypeForNEON(MVT::v8i16);
380 addQRTypeForNEON(MVT::v4i32);
381 addQRTypeForNEON(MVT::v2i64);
382 addQRTypeForNEON(MVT::v8f16);
383 if (Subtarget->hasBF16())
384 addQRTypeForNEON(MVT::v8bf16);
408 if (Subtarget->hasBF16()) {
425 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
601 if (Subtarget->hasCSSC()) {
680 if (Subtarget->hasFullFP16())
698 if (!Subtarget->hasFullFP16()) {
781 for (
MVT Ty : {MVT::f32, MVT::f64})
783 if (Subtarget->hasFullFP16())
790 for (
MVT Ty : {MVT::f32, MVT::f64})
792 if (Subtarget->hasFullFP16())
797 for (
auto VT : {MVT::f32, MVT::f64})
806 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
818 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
844#define LCALLNAMES(A, B, N) \
845 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
846 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
847 setLibcallName(A##N##_REL, #B #N "_rel"); \
848 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
849#define LCALLNAME4(A, B) \
850 LCALLNAMES(A, B, 1) \
851 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
852#define LCALLNAME5(A, B) \
853 LCALLNAMES(A, B, 1) \
854 LCALLNAMES(A, B, 2) \
855 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
856 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
857 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
858 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
859 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
860 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
861 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
867 if (Subtarget->hasLSE128()) {
881 if (Subtarget->hasLSE2()) {
1079 if (Subtarget->hasNEON()) {
1116 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1119 if (Subtarget->hasFullFP16()) {
1152 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1168 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1169 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1176 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1187 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1188 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1189 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1198 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1199 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1221 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1248 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1250 if (Subtarget->hasFullFP16())
1251 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1274 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1277 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1281 if (Subtarget->hasSME()) {
1289 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1298 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1367 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1373 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1374 MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1378 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1379 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1383 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1397 if (VT != MVT::nxv16i1) {
1404 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1405 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1406 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1447 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1448 MVT::nxv4f32, MVT::nxv2f64}) {
1520 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1536 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1537 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1549 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1563 for (
MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1564 MVT::v4i32, MVT::v1i64, MVT::v2i64})
1565 addTypeForFixedLengthSVE(VT,
true);
1568 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1569 addTypeForFixedLengthSVE(VT,
true);
1577 addTypeForFixedLengthSVE(VT,
false);
1580 addTypeForFixedLengthSVE(VT,
false);
1583 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1588 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1590 for (
auto VT : {MVT::v8f16, MVT::v4f32})
1616 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1617 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1629 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1641 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1648 if (Subtarget->hasSVE()) {
1659void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
1669 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1682 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1683 ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1706 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1769 if (Subtarget->hasD128()) {
1778 if (!Subtarget->hasSVE())
1783 if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
1784 ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 &&
1785 ResVT != MVT::v8i1 && ResVT != MVT::v16i1)
1789 if (OpVT != MVT::i32 && OpVT != MVT::i64)
1796 return !Subtarget->
hasSVEorSME() || VT != MVT::nxv16i1;
1799void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT,
1800 bool StreamingSVE) {
1822 while (InnerVT != VT) {
1835 while (InnerVT != VT) {
1934void AArch64TargetLowering::addDRTypeForNEON(
MVT VT) {
1939void AArch64TargetLowering::addQRTypeForNEON(
MVT VT) {
1957 Imm =
C->getZExtValue();
1968 return N->getOpcode() == Opc &&
1973 const APInt &Demanded,
1976 uint64_t OldImm = Imm, NewImm, Enc;
1981 if (Imm == 0 || Imm == Mask ||
1985 unsigned EltSize =
Size;
2002 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2004 uint64_t Sum = RotatedImm + NonDemandedBits;
2005 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2006 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2007 NewImm = (Imm | Ones) & Mask;
2035 while (EltSize <
Size) {
2036 NewImm |= NewImm << EltSize;
2042 "demanded bits should never be altered");
2043 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2046 EVT VT =
Op.getValueType();
2052 if (NewImm == 0 || NewImm == OrigMask) {
2077 EVT VT =
Op.getValueType();
2083 "i32 or i64 is expected after legalization.");
2090 switch (
Op.getOpcode()) {
2094 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2097 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2100 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2115 switch (
Op.getOpcode()) {
2121 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2122 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2123 "Expected DUP implicit truncation");
2124 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2138 ~(
Op->getConstantOperandVal(1) <<
Op->getConstantOperandVal(2));
2188 case Intrinsic::aarch64_ldaxr:
2189 case Intrinsic::aarch64_ldxr: {
2191 EVT VT = cast<MemIntrinsicSDNode>(
Op)->getMemoryVT();
2201 unsigned IntNo = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
2205 case Intrinsic::aarch64_neon_uaddlv: {
2206 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2208 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2209 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2216 case Intrinsic::aarch64_neon_umaxv:
2217 case Intrinsic::aarch64_neon_uminv: {
2222 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2224 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2228 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2242 unsigned Depth)
const {
2243 EVT VT =
Op.getValueType();
2279 unsigned *
Fast)
const {
2280 if (Subtarget->requiresStrictAlign())
2285 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2304 unsigned *
Fast)
const {
2305 if (Subtarget->requiresStrictAlign())
2310 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2334#define MAKE_CASE(V) \
2675 Register DestReg =
MI.getOperand(0).getReg();
2676 Register IfTrueReg =
MI.getOperand(1).getReg();
2677 Register IfFalseReg =
MI.getOperand(2).getReg();
2678 unsigned CondCode =
MI.getOperand(3).getImm();
2679 bool NZCVKilled =
MI.getOperand(4).isKill();
2710 MI.eraseFromParent();
2718 "SEH does not use catchret!");
2730 MIB.
add(
MI.getOperand(1));
2731 MIB.
add(
MI.getOperand(2));
2732 MIB.
add(
MI.getOperand(3));
2733 MIB.
add(
MI.getOperand(4));
2734 MIB.
add(
MI.getOperand(5));
2736 MI.eraseFromParent();
2747 MIB.
add(
MI.getOperand(0));
2748 MIB.
add(
MI.getOperand(1));
2749 MIB.
add(
MI.getOperand(2));
2750 MIB.
add(
MI.getOperand(1));
2752 MI.eraseFromParent();
2762 unsigned StartIdx = 0;
2766 MIB.
addReg(BaseReg +
MI.getOperand(0).getImm());
2771 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
2772 MIB.
add(
MI.getOperand(
I));
2774 MI.eraseFromParent();
2783 MIB.
add(
MI.getOperand(0));
2785 unsigned Mask =
MI.getOperand(0).getImm();
2786 for (
unsigned I = 0;
I < 8;
I++) {
2787 if (Mask & (1 <<
I))
2791 MI.eraseFromParent();
2799 if (SMEOrigInstr != -1) {
2803 switch (SMEMatrixType) {
2805 return EmitZAInstr(SMEOrigInstr, AArch64::ZA,
MI, BB,
false);
2807 return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0,
MI, BB,
true);
2809 return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0,
MI, BB,
true);
2811 return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0,
MI, BB,
true);
2813 return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0,
MI, BB,
true);
2815 return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0,
MI, BB,
true);
2819 switch (
MI.getOpcode()) {
2826 case AArch64::F128CSEL:
2828 case TargetOpcode::STATEPOINT:
2834 MI.addOperand(*
MI.getMF(),
2840 case TargetOpcode::STACKMAP:
2841 case TargetOpcode::PATCHPOINT:
2844 case TargetOpcode::PATCHABLE_EVENT_CALL:
2845 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2848 case AArch64::CATCHRET:
2850 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
2851 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
2852 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
2853 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
2854 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
2855 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
2856 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
2857 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
2858 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
2859 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
2860 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
2861 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
2862 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
2863 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
2864 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
2865 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
2866 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
2867 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
2868 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
2869 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
2870 case AArch64::LDR_ZA_PSEUDO:
2872 case AArch64::ZERO_M_PSEUDO:
2898 N =
N->getOperand(0).getNode();
2906 auto Opnd0 =
N->getOperand(0);
3059 CondCode, CondCode2);
3066 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3068 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3090 EVT VT =
LHS.getValueType();
3095 if (VT == MVT::f16 && !FullFP16) {
3100 Chain =
RHS.getValue(1);
3110 EVT VT =
LHS.getValueType();
3115 if (VT == MVT::f16 && !FullFP16) {
3152 return LHS.getValue(1);
3221 if (
LHS.getValueType().isFloatingPoint()) {
3222 assert(
LHS.getValueType() != MVT::f128);
3223 if (
LHS.getValueType() == MVT::f16 && !FullFP16) {
3261 bool &MustBeFirst,
bool WillNegate,
3262 unsigned Depth = 0) {
3270 MustBeFirst =
false;
3289 if (MustBeFirstL && MustBeFirstR)
3295 if (!CanNegateL && !CanNegateR)
3299 CanNegate = WillNegate && CanNegateL && CanNegateR;
3302 MustBeFirst = !CanNegate;
3307 MustBeFirst = MustBeFirstL || MustBeFirstR;