82#include "llvm/IR/IntrinsicsARM.h"
122#define DEBUG_TYPE "arm-isel"
125STATISTIC(NumMovwMovt,
"Number of GAs materialized with movw + movt");
126STATISTIC(NumLoopByVals,
"Number of loops generated for byval arguments");
128 "Number of constants with their storage promoted into constant pools");
132 cl::desc(
"Enable / disable ARM interworking (for debugging only)"),
137 cl::desc(
"Enable / disable promotion of unnamed_addr constants into "
142 cl::desc(
"Maximum size of constant to promote into a constant pool"),
146 cl::desc(
"Maximum size of ALL constants to promote into a constant pool"),
151 cl::desc(
"Maximum interleave factor for MVE VLDn to generate."),
156 ARM::R0, ARM::R1, ARM::R2, ARM::R3
159void ARMTargetLowering::addTypeForNEON(
MVT VT,
MVT PromotedLdStVT) {
160 if (VT != PromotedLdStVT) {
217void ARMTargetLowering::addDRTypeForNEON(
MVT VT) {
222void ARMTargetLowering::addQRTypeForNEON(
MVT VT) {
227void ARMTargetLowering::setAllExpand(
MVT VT) {
240void ARMTargetLowering::addAllExtLoads(
const MVT From,
const MVT To,
241 LegalizeAction Action) {
247void ARMTargetLowering::addMVEVectorTypes(
bool HasMVEFP) {
250 for (
auto VT : IntTypes) {
325 for (
auto VT : FloatTypes) {
393 for (
auto VT : LongTypes) {
439 for (
auto VT : pTypes) {
495 for (
int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
503 if (Subtarget->isThumb() && Subtarget->
hasVFP2Base() &&
504 Subtarget->
hasARMOps() && !Subtarget->useSoftFloat()) {
505 static const struct {
507 const char *
const Name;
529 { RTLIB::UO_F32,
"__unordsf2vfp",
ISD::SETNE },
538 { RTLIB::UO_F64,
"__unorddf2vfp",
ISD::SETNE },
563 for (
const auto &LC : LibraryCalls) {
583 static const struct {
585 const char *
const Name;
670 for (
const auto &LC : LibraryCalls) {
680 static const struct {
682 const char *
const Name;
685 } MemOpsLibraryCalls[] = {
693 for (
const auto &LC : MemOpsLibraryCalls) {
703 static const struct {
705 const char *
const Name;
718 for (
const auto &LC : LibraryCalls) {
750 static const struct {
752 const char *
const Name;
760 for (
const auto &LC : LibraryCalls) {
771 if (!Subtarget->useSoftFloat() && !Subtarget->
isThumb1Only() &&
772 Subtarget->hasFPRegs()) {
783 if (!Subtarget->hasFP64())
787 if (Subtarget->hasFullFP16()) {
796 if (Subtarget->hasBF16()) {
799 if (!Subtarget->hasFullFP16())
806 addAllExtLoads(VT, InnerVT,
Expand);
821 if (Subtarget->hasMVEIntegerOps())
822 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
825 if (Subtarget->hasLOB()) {
829 if (Subtarget->hasNEON()) {
843 if (Subtarget->hasFullFP16()) {
848 if (Subtarget->hasBF16()) {
854 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
892 if (Subtarget->hasNEON()) {
1012 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1020 if (Subtarget->hasMVEIntegerOps()) {
1025 if (Subtarget->hasMVEFloatOps()) {
1029 if (!Subtarget->hasFP64()) {
1075 if (Subtarget->hasFullFP16()) {
1081 if (!Subtarget->hasFP16()) {
1129 if (Subtarget->hasDSP()) {
1151 if (Subtarget->
isThumb1Only() || !Subtarget->hasV6Ops()
1152 || (Subtarget->
isThumb2() && !Subtarget->hasDSP()))
1167 if (Subtarget->hasMVEIntegerOps())
1177 if (!Subtarget->
isThumb1Only() && Subtarget->hasV6T2Ops())
1188 if (!Subtarget->hasV5TOps() || Subtarget->
isThumb1Only()) {
1197 if (Subtarget->hasPerfMon())
1201 if (!Subtarget->hasV6Ops())
1204 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1205 : Subtarget->hasDivideInARMMode();
1212 if (Subtarget->
isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1229 HasStandaloneRem =
false;
1234 const char *
const Name;
1236 } LibraryCalls[] = {
1248 for (
const auto &LC : LibraryCalls) {
1255 const char *
const Name;
1257 } LibraryCalls[] = {
1269 for (
const auto &LC : LibraryCalls) {
1313 InsertFencesForAtomic =
false;
1315 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1319 if (!Subtarget->isThumb() || !Subtarget->
isMClass())
1324 if (!Subtarget->hasAcquireRelease() ||
1327 InsertFencesForAtomic =
true;
1333 if (Subtarget->hasDataBarrier())
1334 InsertFencesForAtomic =
true;
1354 if (!InsertFencesForAtomic) {
1362 (!Subtarget->
isMClass() && Subtarget->hasV6Ops())) {
1374 }
else if ((Subtarget->
isMClass() && Subtarget->hasV8MBaselineOps()) ||
1375 Subtarget->hasForced32BitAtomics()) {
1389 if (!Subtarget->hasV6Ops()) {
1395 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1421 if (Subtarget->hasFullFP16()) {
1431 if (Subtarget->hasFullFP16())
1446 if (!Subtarget->useSoftFloat() && Subtarget->
hasVFP2Base() &&
1460 if (!Subtarget->useSoftFloat() && !Subtarget->
isThumb1Only()) {
1468 if (!Subtarget->hasFP16()) {
1499 if (Subtarget->hasNEON()) {
1506 if (Subtarget->hasFP64()) {
1519 if (Subtarget->hasFullFP16()) {
1536 if (Subtarget->hasNEON()) {
1552 if (Subtarget->hasFullFP16()) {
1570 if (Subtarget->hasMVEIntegerOps())
1573 if (Subtarget->hasV6Ops())
1578 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1585 if (Subtarget->useSoftFloat() || Subtarget->
isThumb1Only() ||
1610 if (Subtarget->isThumb() || Subtarget->
isThumb2())
1615 return Subtarget->useSoftFloat();
1628std::pair<const TargetRegisterClass *, uint8_t>
1641 RRC = &ARM::DPRRegClass;
1651 RRC = &ARM::DPRRegClass;
1655 RRC = &ARM::DPRRegClass;
1659 RRC = &ARM::DPRRegClass;
1663 return std::make_pair(RRC,
Cost);
1667#define MAKE_CASE(V) \
1887 if ((Subtarget->hasMVEIntegerOps() &&
1890 (Subtarget->hasMVEFloatOps() &&
1905 if (Subtarget->hasNEON()) {
1907 return &ARM::QQPRRegClass;
1909 return &ARM::QQQQPRRegClass;
1911 if (Subtarget->hasMVEIntegerOps()) {
1913 return &ARM::MQQPRRegClass;
1915 return &ARM::MQQQQPRRegClass;
1924 Align &PrefAlign)
const {
1925 if (!isa<MemIntrinsic>(CI))
1943 unsigned NumVals =
N->getNumValues();
1947 for (
unsigned i = 0; i != NumVals; ++i) {
1948 EVT VT =
N->getValueType(i);
1955 if (!
N->isMachineOpcode())
1979 if (
auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1980 return Const->getZExtValue() == 16;
1987 if (
auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1988 return Const->getZExtValue() == 16;
1995 if (
auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1996 return Const->getZExtValue() == 16;
2006 return isSHL16(Op.getOperand(0));
2065 bool isVarArg)
const {
2084 else if (Subtarget->hasFPRegs() && !Subtarget->
isThumb1Only() &&
2105 bool isVarArg)
const {
2106 return CCAssignFnForNode(
CC,
false, isVarArg);
2110 bool isVarArg)
const {
2111 return CCAssignFnForNode(
CC,
true, isVarArg);
2118 bool isVarArg)
const {
2119 switch (getEffectiveCallingConv(
CC, isVarArg)) {
2143 if (Subtarget->hasFullFP16()) {
2156 if (Subtarget->hasFullFP16()) {
2170SDValue ARMTargetLowering::LowerCallResult(
2182 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
2187 if (i == 0 && isThisReturn) {
2189 "unexpected return calling convention register assignment");
2200 Chain =
Lo.getValue(1);
2201 InFlag =
Lo.getValue(2);
2205 Chain =
Hi.getValue(1);
2206 InFlag =
Hi.getValue(2);
2218 Chain =
Lo.getValue(1);
2219 InFlag =
Lo.getValue(2);
2222 Chain =
Hi.getValue(1);
2223 InFlag =
Hi.getValue(2);
2258std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2260 bool IsTailCall,
int SPDiff)
const {
2282 return std::make_pair(DstAddr, DstInfo);
2287 RegsToPassVector &RegsToPass,
2295 unsigned id = Subtarget->
isLittle() ? 0 : 1;
2308 std::tie(DstAddr, DstInfo) =
2309 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2341 bool isStructRet = (Outs.
empty()) ?
false : Outs[0].
Flags.isSRet();
2342 bool isThisReturn =
false;
2343 bool isCmseNSCall =
false;
2344 bool isSibCall =
false;
2345 bool PreferIndirect =
false;
2346 bool GuardWithBTI =
false;
2350 !Subtarget->noBTIAtReturnTwice())
2355 isCmseNSCall =
true;
2367 if (isa<GlobalAddressSDNode>(
Callee)) {
2371 auto *GV = cast<GlobalAddressSDNode>(
Callee)->getGlobal();
2374 PreferIndirect = Subtarget->isThumb() && Subtarget->
hasMinSize() &&
2376 return isa<Instruction>(U) &&
2377 cast<Instruction>(U)->getParent() == BB;
2383 isTailCall = IsEligibleForTailCallOptimization(
2384 Callee, CallConv, isVarArg, isStructRet,
2400 "site marked musttail");
2408 unsigned NumBytes = CCInfo.getNextStackOffset();
2417 if (isTailCall && !isSibCall) {
2424 NumBytes =
alignTo(NumBytes, StackAlign);
2429 SPDiff = NumReusableBytes - NumBytes;
2433 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (
unsigned)-SPDiff)
2449 RegsToPassVector RegsToPass;
2457 bool AfterFormalArgLoads =
false;
2461 for (
unsigned i = 0, realArgIdx = 0, e = ArgLocs.
size();
2463 ++i, ++realArgIdx) {
2467 bool isByVal =
Flags.isByVal();
2487 if (isTailCall && VA.
isMemLoc() && !AfterFormalArgLoads) {
2489 AfterFormalArgLoads =
true;
2501 auto ArgVT = Outs[realArgIdx].ArgVT;
2502 if (isCmseNSCall && (ArgVT ==
MVT::f16)) {
2520 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2521 StackPtr, MemOpChains, isTailCall, SPDiff);
2525 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2526 StackPtr, MemOpChains, isTailCall, SPDiff);
2531 std::tie(DstAddr, DstInfo) =
2532 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2536 PassF64ArgInRegs(dl, DAG, Chain,
Arg, RegsToPass, VA, ArgLocs[++i],
2537 StackPtr, MemOpChains, isTailCall, SPDiff);
2539 if (realArgIdx == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
2542 "unexpected calling convention register assignment");
2544 "unexpected use of 'returned'");
2545 isThisReturn =
true;
2550 RegsToPass.push_back(std::make_pair(VA.
getLocReg(),
Arg));
2551 }
else if (isByVal) {
2553 unsigned offset = 0;
2557 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2558 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2560 if (CurByValIdx < ByValArgsCount) {
2562 unsigned RegBegin, RegEnd;
2563 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2568 for (i = 0, j = RegBegin;
j < RegEnd; i++,
j++) {
2575 RegsToPass.push_back(std::make_pair(j, Load));
2580 offset = RegEnd - RegBegin;
2582 CCInfo.nextInRegsParam();
2585 if (
Flags.getByValSize() > 4*offset) {
2589 std::tie(Dst, DstInfo) =
2590 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2599 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2607 std::tie(DstAddr, DstInfo) =
2608 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2615 if (!MemOpChains.
empty())
2621 for (
unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2622 Chain = DAG.
getCopyToReg(Chain, dl, RegsToPass[i].first,
2623 RegsToPass[i].second, InFlag);
2630 bool isDirect =
false;
2636 GVal =
G->getGlobal();
2640 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->
isMClass());
2641 bool isLocalARMFunc =
false;
2644 if (Subtarget->genLongCalls()) {
2646 "long-calls codegen is not position independent!");
2650 if (isa<GlobalAddressSDNode>(
Callee)) {
2654 if (Subtarget->genExecuteOnly()) {
2656 "long-calls with execute-only requires movt and movw!");
2674 const char *Sym = S->getSymbol();
2679 if (Subtarget->genExecuteOnly()) {
2681 "long-calls with execute-only requires movt and movw!");
2699 }
else if (isa<GlobalAddressSDNode>(
Callee)) {
2700 if (!PreferIndirect) {
2705 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !
ARMInterworking);
2707 if (isStub && Subtarget->
isThumb1Only() && !Subtarget->hasV5TOps()) {
2719 "Windows is the only supported COFF target");
2723 else if (!
TM.shouldAssumeDSOLocal(*GVal->
getParent(), GVal))
2739 const char *Sym = S->getSymbol();
2740 if (isARMFunc && Subtarget->
isThumb1Only() && !Subtarget->hasV5TOps()) {
2744 ARMPCLabelIndex, 4);
2758 assert(!isARMFunc && !isDirect &&
2759 "Cannot handle call to ARM function or direct call");
2762 "call to non-secure function would "
2763 "require passing arguments on stack",
2770 "call to non-secure function would return value through pointer",
2778 if (Subtarget->isThumb()) {
2781 else if (isCmseNSCall)
2783 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2788 if (!isDirect && !Subtarget->hasV5TOps())
2790 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2803 if (isTailCall && !isSibCall) {
2808 std::vector<SDValue> Ops;
2809 Ops.push_back(Chain);
2818 for (
unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2819 Ops.push_back(DAG.
getRegister(RegsToPass[i].first,
2820 RegsToPass[i].second.getValueType()));
2832 isThisReturn =
false;
2838 assert(Mask &&
"Missing call preserved mask for calling convention");
2842 Ops.push_back(InFlag);
2853 Chain = DAG.
getNode(CallOpc, dl, NodeTys, Ops);
2865 Chain = DAG.
getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, dl);
2871 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2872 InVals, isThisReturn,
2873 isThisReturn ? OutVals[0] :
SDValue());
2880void ARMTargetLowering::HandleByVal(
CCState *State,
unsigned &
Size,
2881 Align Alignment)
const {
2883 Alignment = std::max(Alignment,
Align(4));
2889 unsigned AlignInRegs = Alignment.
value() / 4;
2890 unsigned Waste = (ARM::R4 -
Reg) % AlignInRegs;
2891 for (
unsigned i = 0; i < Waste; ++i)
2897 unsigned Excess = 4 * (ARM::R4 -
Reg);
2904 if (NSAAOffset != 0 &&
Size > Excess) {
2916 unsigned ByValRegBegin =
Reg;
2917 unsigned ByValRegEnd = std::min<unsigned>(Reg +
Size / 4, ARM::R4);
2921 for (
unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2927 Size = std::max<int>(
Size - Excess, 0);
2937 unsigned Bytes =
Arg.getValueSizeInBits() / 8;
2938 int FI = std::numeric_limits<int>::max();
2940 Register VR = cast<RegisterSDNode>(
Arg.getOperand(1))->getReg();
2946 if (!
Flags.isByVal()) {
2953 if (
Flags.isByVal())
2968 assert(FI != std::numeric_limits<int>::max());
2977bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2979 bool isCalleeStructRet,
bool isCallerStructRet,
2983 const bool isIndirect)
const {
2996 if (Outs.
size() >= 4 &&
2997 (!isa<GlobalAddressSDNode>(
Callee.getNode()) || isIndirect)) {
3015 return CalleeCC == CallerCC;
3019 if (isCalleeStructRet || isCallerStructRet)
3033 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
3040 getEffectiveCallingConv(CalleeCC, isVarArg),
3041 getEffectiveCallingConv(CallerCC, CallerF.
isVarArg()), MF,
C, Ins,
3047 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
3048 if (CalleeCC != CallerCC) {
3049 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
3050 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3063 if (!Outs.
empty()) {
3067 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs,
C);
3069 if (CCInfo.getNextStackOffset()) {
3075 for (
unsigned i = 0, realArgIdx = 0, e = ArgLocs.
size();
3077 ++i, ++realArgIdx) {
3091 if (!ArgLocs[++i].isRegLoc())
3094 if (!ArgLocs[++i].isRegLoc())
3096 if (!ArgLocs[++i].isRegLoc())
3121 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3130 StringRef IntKind =
F.getFnAttribute(
"interrupt").getValueAsString();
3143 if (IntKind ==
"" || IntKind ==
"IRQ" || IntKind ==
"FIQ" ||
3146 else if (IntKind ==
"SWI" || IntKind ==
"UNDEF")
3150 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3177 bool isLittleEndian = Subtarget->
isLittle();
3189 "secure entry function would return value through pointer",
3195 for (
unsigned i = 0, realRVLocIdx = 0;
3197 ++i, ++realRVLocIdx) {
3202 bool ReturnF16 =
false;
3237 auto RetVT = Outs[realRVLocIdx].ArgVT;
3263 HalfGPRs.
getValue(isLittleEndian ? 0 : 1), Flag);
3269 HalfGPRs.
getValue(isLittleEndian ? 1 : 0), Flag);
3283 fmrrd.
getValue(isLittleEndian ? 0 : 1), Flag);
3288 fmrrd.
getValue(isLittleEndian ? 1 : 0), Flag);
3335bool ARMTargetLowering::isUsedByReturnOnly(
SDNode *
N,
SDValue &Chain)
const {
3336 if (
N->getNumValues() != 1)
3338 if (!
N->hasNUsesOfValue(1, 0))
3348 TCChain =
Copy->getOperand(0);
3362 SDValue UseChain =
U->getOperand(0);
3370 if (
U->getOperand(
U->getNumOperands() - 1).ge