61 #include "llvm/IR/IntrinsicsAArch64.h"
96 #define DEBUG_TYPE "aarch64-lower"
98 STATISTIC(NumTailCalls,
"Number of tail calls");
99 STATISTIC(NumShiftInserts,
"Number of vector shift inserts");
100 STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
107 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
112 cl::desc(
"Enable AArch64 logical imm instruction "
122 cl::desc(
"Combine extends of AArch64 masked "
123 "gather intrinsics"),
155 switch (EC.getKnownMinValue()) {
171 "Expected scalable predicate vector type!");
193 "Expected legal vector type!");
281 if (Subtarget->
hasSVE()) {
309 if (useSVEForFixedLengthVectorVT(VT))
313 if (useSVEForFixedLengthVectorVT(VT))
749 #define LCALLNAMES(A, B, N) \
750 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
751 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
752 setLibcallName(A##N##_REL, #B #N "_rel"); \
753 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
754 #define LCALLNAME4(A, B) \
755 LCALLNAMES(A, B, 1) \
756 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
757 #define LCALLNAME5(A, B) \
758 LCALLNAMES(A, B, 1) \
759 LCALLNAMES(A, B, 2) \
760 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
761 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
762 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
763 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
764 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
765 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
766 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
1111 if (Subtarget->
hasSVE()) {
1238 if (useSVEForFixedLengthVectorVT(VT))
1239 addTypeForFixedLengthSVE(VT);
1241 if (useSVEForFixedLengthVectorVT(VT))
1242 addTypeForFixedLengthSVE(VT);
1324 void AArch64TargetLowering::addTypeForNEON(
MVT VT,
MVT PromotedBitwiseVT) {
1391 for (
unsigned Opcode :
1404 void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
1498 void AArch64TargetLowering::addDRTypeForNEON(
MVT VT) {
1503 void AArch64TargetLowering::addQRTypeForNEON(
MVT VT) {
1518 const APInt &Demanded,
1521 uint64_t OldImm = Imm, NewImm, Enc;
1522 uint64_t
Mask = ((uint64_t)(-1LL) >> (64 -
Size)), OrigMask =
Mask;
1526 if (Imm == 0 || Imm ==
Mask ||
1530 unsigned EltSize =
Size;
1546 uint64_t RotatedImm =
1547 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1549 uint64_t Sum = RotatedImm + NonDemandedBits;
1550 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1551 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1552 NewImm = (Imm | Ones) &
Mask;
1566 uint64_t
Hi = Imm >> EltSize, DemandedBitsHi =
DemandedBits >> EltSize;
1580 while (EltSize <
Size) {
1581 NewImm |= NewImm << EltSize;
1587 "demanded bits should never be altered");
1588 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
1591 EVT VT =
Op.getValueType();
1597 if (NewImm == 0 || NewImm == OrigMask) {
1622 EVT VT =
Op.getValueType();
1628 "i32 or i64 is expected after legalization.");
1635 switch (
Op.getOpcode()) {
1639 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1642 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1645 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1651 uint64_t Imm =
C->getZExtValue();
1660 switch (
Op.getOpcode()) {
1683 case Intrinsic::aarch64_ldaxr:
1684 case Intrinsic::aarch64_ldxr: {
1686 EVT VT = cast<MemIntrinsicSDNode>(
Op)->getMemoryVT();
1696 unsigned IntNo = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
1700 case Intrinsic::aarch64_neon_umaxv:
1701 case Intrinsic::aarch64_neon_uminv: {
1706 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
1786 #define MAKE_CASE(V) \
2094 Register DestReg =
MI.getOperand(0).getReg();
2095 Register IfTrueReg =
MI.getOperand(1).getReg();
2096 Register IfFalseReg =
MI.getOperand(2).getReg();
2097 unsigned CondCode =
MI.getOperand(3).getImm();
2098 bool NZCVKilled =
MI.getOperand(4).isKill();
2129 MI.eraseFromParent();
2136 BB->getParent()->getFunction().getPersonalityFn())) &&
2137 "SEH does not use catchret!");
2143 switch (
MI.getOpcode()) {
2150 case AArch64::F128CSEL:
2153 case TargetOpcode::STACKMAP:
2154 case TargetOpcode::PATCHPOINT:
2155 case TargetOpcode::STATEPOINT:
2175 N =
N->getOperand(0).getNode();
2183 auto Opnd0 =
N->getOperand(0);
2184 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2185 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2186 return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
2345 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
2347 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
2371 assert(VT !=
MVT::f16 &&
"Lowering of strict fp16 not yet implemented");
2380 const bool FullFP16 =
2399 if (
isCMN(RHS, CC)) {
2403 }
else if (
isCMN(LHS, CC)) {
2488 unsigned Opcode = 0;
2489 const bool FullFP16 =
2514 return DAG.
getNode(Opcode,
DL,
MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2532 bool &MustBeFirst,
bool WillNegate,
2533 unsigned Depth = 0) {
2541 MustBeFirst =
false;
2548 bool IsOR = Opcode ==
ISD::OR;
2560 if (MustBeFirstL && MustBeFirstR)
2566 if (!CanNegateL && !CanNegateR)
2570 CanNegate = WillNegate && CanNegateL && CanNegateR;
2573 MustBeFirst = !CanNegate;
2578 MustBeFirst = MustBeFirstL || MustBeFirstR;
2636 bool IsOR = Opcode ==
ISD::OR;
2642 assert(ValidL &&
"Valid conjunction/disjunction tree");
2649 assert(ValidR &&
"Valid conjunction/disjunction tree");
2654 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
2663 bool NegateAfterAll;
2667 assert(CanNegateR &&
"at least one side must be negatable");
2668 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
2672 NegateAfterR =
true;
2675 NegateR = CanNegateR;
2676 NegateAfterR = !CanNegateR;
2679 NegateAfterAll = !Negate;
2681 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
2682 assert(!Negate &&
"Valid conjunction/disjunction tree");
2686 NegateAfterR =
false;
2687 NegateAfterAll =
false;
2707 bool DummyCanNegate;
2708 bool DummyMustBeFirst;
2720 auto isSupportedExtend = [&](
SDValue V) {
2725 if (
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2726 uint64_t
Mask = MaskCst->getZExtValue();
2727 return (
Mask == 0xFF ||
Mask == 0xFFFF ||
Mask == 0xFFFFFFFF);
2733 if (!
Op.hasOneUse())
2736 if (isSupportedExtend(
Op))
2739 unsigned Opc =
Op.getOpcode();
2741 if (
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
2742 uint64_t
Shift = ShiftCst->getZExtValue();
2743 if (isSupportedExtend(
Op.getOperand(0)))
2744 return (
Shift <= 4) ? 2 : 1;
2745 EVT VT =
Op.getValueType();
2758 uint64_t
C = RHSC->getZExtValue();
2766 if ((VT ==
MVT::i32 &&
C != 0x80000000 &&
2768 (VT ==
MVT::i64 &&
C != 0x80000000ULL &&
2787 if ((VT ==
MVT::i32 &&
C != INT32_MAX &&
2798 if ((VT ==
MVT::i32 &&
C != UINT32_MAX &&
2821 if (!isa<ConstantSDNode>(RHS) ||
2852 if ((RHSC->
getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2853 cast<LoadSDNode>(LHS)->getExtensionType() ==
ISD::ZEXTLOAD &&
2854 cast<LoadSDNode>(LHS)->getMemoryVT() ==
MVT::i16 &&
2856 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2884 static std::pair<SDValue, SDValue>
2887 "Unsupported value type");
2893 switch (
Op.getOpcode()) {
2994 Overflow =
Value.getValue(1);
2996 return std::make_pair(
Value, Overflow);
3000 if (useSVEForFixedLengthVectorVT(
Op.getValueType()))
3001 return LowerToScalableOp(
Op, DAG);
3055 if (!CFVal || !CTVal)
3083 EVT VT =
Op.getValueType();
3092 bool ExtraOp =
false;
3093 switch (
Op.getOpcode()) {
3151 unsigned IsWrite = cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue();
3152 unsigned Locality = cast<ConstantSDNode>(
Op.getOperand(3))->getZExtValue();
3153 unsigned IsData = cast<ConstantSDNode>(
Op.getOperand(4))->getZExtValue();
3155 bool IsStream = !Locality;
3159 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
3163 Locality = 3 - Locality;
3167 unsigned PrfOp = (IsWrite << 4) |
3177 if (
Op.getValueType().isScalableVector())
3186 if (
Op.getValueType().isScalableVector())
3189 bool IsStrict =
Op->isStrictFPOpcode();
3190 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
3195 if (useSVEForFixedLengthVectorVT(SrcVT))
3210 EVT InVT =
Op.getOperand(0).getValueType();
3211 EVT VT =
Op.getValueType();
3217 return LowerToPredicatedOp(
Op, DAG, Opcode);
3228 Op.getOpcode(), dl,
Op.getValueType(),
3234 if (VTSize < InVTSize) {
3242 if (VTSize > InVTSize) {
3257 bool IsStrict =
Op->isStrictFPOpcode();
3258 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
3261 return LowerVectorFP_TO_INT(
Op, DAG);
3265 assert(!IsStrict &&
"Lowering of strict fp16 not yet implemented");
3268 Op.getOpcode(), dl,
Op.getValueType(),
3285 EVT VT =
Op.getValueType();
3288 EVT InVT =
In.getValueType();
3289 unsigned Opc =
Op.getOpcode();
3303 return LowerToPredicatedOp(
Op, DAG, Opcode);
3308 if (VTSize < InVTSize) {
3316 if (VTSize > InVTSize) {
3328 if (
Op.getValueType().isVector())
3329 return LowerVectorINT_TO_FP(
Op, DAG);
3331 bool IsStrict =
Op->isStrictFPOpcode();
3332 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
3337 assert(!IsStrict &&
"Lowering of strict fp16 not yet implemented");
3362 EVT ArgVT =
Arg.getValueType();
3370 Entry.IsSExt =
false;
3371 Entry.IsZExt =
false;
3372 Args.push_back(Entry);
3375 : RTLIB::SINCOS_STRET_F32;
3386 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
3387 return CallResult.first;
3391 EVT OpVT =
Op.getValueType();
3413 switch (OrigSimpleTy) {