67#include "llvm/IR/IntrinsicsAArch64.h"
104#define DEBUG_TYPE "aarch64-lower"
107STATISTIC(NumShiftInserts,
"Number of vector shift inserts");
108STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
115 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
120 cl::desc(
"Enable AArch64 logical imm instruction "
130 cl::desc(
"Combine extends of AArch64 masked "
131 "gather intrinsics"),
169 switch (EC.getKnownMinValue()) {
185 "Expected scalable predicate vector type!");
207 "Expected legal vector type!");
253 switch (Op.getOpcode()) {
264 switch (Op.getConstantOperandVal(0)) {
267 case Intrinsic::aarch64_sve_ptrue:
268 case Intrinsic::aarch64_sve_pnext:
269 case Intrinsic::aarch64_sve_cmpeq:
270 case Intrinsic::aarch64_sve_cmpne:
271 case Intrinsic::aarch64_sve_cmpge:
272 case Intrinsic::aarch64_sve_cmpgt:
273 case Intrinsic::aarch64_sve_cmphs:
274 case Intrinsic::aarch64_sve_cmphi:
275 case Intrinsic::aarch64_sve_cmpeq_wide:
276 case Intrinsic::aarch64_sve_cmpne_wide:
277 case Intrinsic::aarch64_sve_cmpge_wide:
278 case Intrinsic::aarch64_sve_cmpgt_wide:
279 case Intrinsic::aarch64_sve_cmplt_wide:
280 case Intrinsic::aarch64_sve_cmple_wide:
281 case Intrinsic::aarch64_sve_cmphs_wide:
282 case Intrinsic::aarch64_sve_cmphi_wide:
283 case Intrinsic::aarch64_sve_cmplo_wide:
284 case Intrinsic::aarch64_sve_cmpls_wide:
285 case Intrinsic::aarch64_sve_fcmpeq:
286 case Intrinsic::aarch64_sve_fcmpne:
287 case Intrinsic::aarch64_sve_fcmpge:
288 case Intrinsic::aarch64_sve_fcmpgt:
289 case Intrinsic::aarch64_sve_fcmpuo:
290 case Intrinsic::aarch64_sve_facgt:
291 case Intrinsic::aarch64_sve_facge:
292 case Intrinsic::aarch64_sve_whilege:
293 case Intrinsic::aarch64_sve_whilegt:
294 case Intrinsic::aarch64_sve_whilehi:
295 case Intrinsic::aarch64_sve_whilehs:
296 case Intrinsic::aarch64_sve_whilele:
297 case Intrinsic::aarch64_sve_whilelo:
298 case Intrinsic::aarch64_sve_whilels:
299 case Intrinsic::aarch64_sve_whilelt:
300 case Intrinsic::aarch64_sve_match:
301 case Intrinsic::aarch64_sve_nmatch:
302 case Intrinsic::aarch64_sve_whilege_x2:
303 case Intrinsic::aarch64_sve_whilegt_x2:
304 case Intrinsic::aarch64_sve_whilehi_x2:
305 case Intrinsic::aarch64_sve_whilehs_x2:
306 case Intrinsic::aarch64_sve_whilele_x2:
307 case Intrinsic::aarch64_sve_whilelo_x2:
308 case Intrinsic::aarch64_sve_whilels_x2:
309 case Intrinsic::aarch64_sve_whilelt_x2:
329 if (Subtarget->hasLS64()) {
335 if (Subtarget->hasFPARMv8()) {
343 if (Subtarget->hasNEON()) {
354 if (Subtarget->hasBF16())
364 if (Subtarget->hasBF16())
389 if (Subtarget->hasBF16()) {
565 if (Subtarget->hasCSSC()) {
644 if (Subtarget->hasFullFP16())
661 if (!Subtarget->hasFullFP16()) {
745 if (Subtarget->hasFullFP16())
754 if (Subtarget->hasFullFP16())
775 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
801#define LCALLNAMES(A, B, N) \
802 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
803 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
804 setLibcallName(A##N##_REL, #B #N "_rel"); \
805 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
806#define LCALLNAME4(A, B) \
807 LCALLNAMES(A, B, 1) \
808 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
809#define LCALLNAME5(A, B) \
810 LCALLNAMES(A, B, 1) \
811 LCALLNAMES(A, B, 2) \
812 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
813 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
814 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
815 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
816 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
817 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
818 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
824 if (Subtarget->hasLSE128()) {
838 if (Subtarget->hasLSE2()) {
1025 if (Subtarget->hasNEON()) {
1065 if (Subtarget->hasFullFP16()) {
1133 if (VT.getVectorElementType() !=
MVT::f16 || Subtarget->hasFullFP16()) {
1186 if (Subtarget->hasFullFP16())
1208 if (Subtarget->hasSME()) {
1222 if (Subtarget->hasSVE()) {
1280 if (Subtarget->hasSVE2()) {
1471 addTypeForStreamingSVE(VT);
1475 addTypeForStreamingSVE(VT);
1483 addTypeForFixedLengthSVE(VT);
1486 addTypeForFixedLengthSVE(VT);
1545 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1557void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
1612 for (
unsigned Opcode :
1630 for (
unsigned Opcode :
1666 if (Subtarget->hasD128()) {
1675 if (!Subtarget->hasSVE())
1692void AArch64TargetLowering::addTypeForStreamingSVE(
MVT VT) {
1722 while (InnerVT != VT) {
1735 while (InnerVT != VT) {
1813void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
1835 while (InnerVT != VT) {
1848 while (InnerVT != VT) {
1943void AArch64TargetLowering::addDRTypeForNEON(
MVT VT) {
1948void AArch64TargetLowering::addQRTypeForNEON(
MVT VT) {
1966 Imm =
C->getZExtValue();
1977 return N->getOpcode() == Opc &&
1982 const APInt &Demanded,
1985 uint64_t OldImm = Imm, NewImm, Enc;
1990 if (Imm == 0 || Imm == Mask ||
1994 unsigned EltSize =
Size;
2011 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2013 uint64_t Sum = RotatedImm + NonDemandedBits;
2014 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2015 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2016 NewImm = (Imm | Ones) & Mask;
2044 while (EltSize <
Size) {
2045 NewImm |= NewImm << EltSize;
2051 "demanded bits should never be altered");
2052 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2055 EVT VT = Op.getValueType();
2061 if (NewImm == 0 || NewImm == OrigMask) {
2062 New = TLO.
DAG.
getNode(Op.getOpcode(),
DL, VT, Op.getOperand(0),
2086 EVT VT = Op.getValueType();
2092 "i32 or i64 is expected after legalization.");
2099 switch (Op.getOpcode()) {
2103 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2106 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2109 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2124 switch (Op.getOpcode()) {
2130 if (
SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) {
2131 assert(
SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() &&
2132 "Expected DUP implicit truncation");
2133 Known = Known.
trunc(Op.getScalarValueSizeInBits());
2147 ~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
2190 case Intrinsic::aarch64_ldaxr:
2191 case Intrinsic::aarch64_ldxr: {
2193 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
2203 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2207 case Intrinsic::aarch64_neon_umaxv:
2208 case Intrinsic::aarch64_neon_uminv: {
2213 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
2238 unsigned *
Fast)
const {
2239 if (Subtarget->requiresStrictAlign())
2244 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2263 unsigned *
Fast)
const {
2264 if (Subtarget->requiresStrictAlign())
2269 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2293#define MAKE_CASE(V) \
2627 Register DestReg =
MI.getOperand(0).getReg();
2628 Register IfTrueReg =
MI.getOperand(1).getReg();
2629 Register IfFalseReg =
MI.getOperand(2).getReg();
2630 unsigned CondCode =
MI.getOperand(3).getImm();
2631 bool NZCVKilled =
MI.getOperand(4).isKill();
2662 MI.eraseFromParent();
2670 "SEH does not use catchret!");
2682 MIB.
add(
MI.getOperand(1));
2683 MIB.
add(
MI.getOperand(2));
2684 MIB.
add(
MI.getOperand(3));
2685 MIB.
add(
MI.getOperand(4));
2686 MIB.
add(
MI.getOperand(5));
2688 MI.eraseFromParent();
2699 MIB.
add(
MI.getOperand(0));
2700 MIB.
add(
MI.getOperand(1));
2701 MIB.
add(
MI.getOperand(2));
2702 MIB.
add(
MI.getOperand(1));
2704 MI.eraseFromParent();
2714 unsigned StartIdx = 0;
2718 MIB.
addReg(BaseReg +
MI.getOperand(0).getImm());
2723 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
2724 MIB.
add(
MI.getOperand(
I));
2726 MI.eraseFromParent();
2735 MIB.
add(
MI.getOperand(0));
2737 unsigned Mask =
MI.getOperand(0).getImm();
2738 for (
unsigned I = 0;
I < 8;
I++) {
2739 if (Mask & (1 <<
I))
2743 MI.eraseFromParent();
2751 if (SMEOrigInstr != -1) {
2755 switch (SMEMatrixType) {
2757 return EmitZAInstr(SMEOrigInstr, AArch64::ZA,
MI, BB,
false);
2759 return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0,
MI, BB,
true);
2761 return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0,
MI, BB,
true);
2763 return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0,
MI, BB,
true);
2765 return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0,
MI, BB,
true);
2767 return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0,
MI, BB,
true);
2771 switch (
MI.getOpcode()) {
2778 case AArch64::F128CSEL:
2780 case TargetOpcode::STATEPOINT:
2786 MI.addOperand(*
MI.getMF(),
2792 case TargetOpcode::STACKMAP:
2793 case TargetOpcode::PATCHPOINT:
2796 case AArch64::CATCHRET:
2798 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
2799 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
2800 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
2801 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
2802 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
2803 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
2804 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
2805 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
2806 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
2807 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
2808 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
2809 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
2810 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
2811 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
2812 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
2813 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
2814 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
2815 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
2816 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
2817 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
2818 case AArch64::LDR_ZA_PSEUDO:
2820 case AArch64::ZERO_M_PSEUDO:
2846 N =
N->getOperand(0).getNode();
2854 auto Opnd0 =
N->getOperand(0);