83#include "llvm/IR/IntrinsicsARM.h"
118#define DEBUG_TYPE "arm-isel"
121STATISTIC(NumMovwMovt,
"Number of GAs materialized with movw + movt");
122STATISTIC(NumLoopByVals,
"Number of loops generated for byval arguments");
124 "Number of constants with their storage promoted into constant pools");
128 cl::desc(
"Enable / disable ARM interworking (for debugging only)"),
133 cl::desc(
"Enable / disable promotion of unnamed_addr constants into "
138 cl::desc(
"Maximum size of constant to promote into a constant pool"),
142 cl::desc(
"Maximum size of ALL constants to promote into a constant pool"),
147 cl::desc(
"Maximum interleave factor for MVE VLDn to generate."),
152 cl::desc(
"Maximum number of base-updates to check generating postindex."),
160 ARM::R0, ARM::R1, ARM::R2, ARM::R3
174void ARMTargetLowering::addTypeForNEON(
MVT VT,
MVT PromotedLdStVT) {
175 if (VT != PromotedLdStVT) {
184 if (ElemTy != MVT::f64)
188 if (ElemTy == MVT::i32) {
232void ARMTargetLowering::addDRTypeForNEON(
MVT VT) {
234 addTypeForNEON(VT, MVT::f64);
237void ARMTargetLowering::addQRTypeForNEON(
MVT VT) {
239 addTypeForNEON(VT, MVT::v2f64);
242void ARMTargetLowering::setAllExpand(
MVT VT) {
255void ARMTargetLowering::addAllExtLoads(
const MVT From,
const MVT To,
262void ARMTargetLowering::addMVEVectorTypes(
bool HasMVEFP) {
263 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
265 for (
auto VT : IntTypes) {
339 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
340 for (
auto VT : FloatTypes) {
414 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
415 for (
auto VT : LongTypes) {
432 addAllExtLoads(MVT::v8i16, MVT::v8i8,
Legal);
433 addAllExtLoads(MVT::v4i32, MVT::v4i16,
Legal);
434 addAllExtLoads(MVT::v4i32, MVT::v4i8,
Legal);
451 for (
auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
460 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
461 for (
auto VT : pTypes) {
512 RegInfo(Subtarget->getRegisterInfo()),
513 Itins(Subtarget->getInstrItineraryData()) {
519 const Triple &TT = TM.getTargetTriple();
521 if (Subtarget->isThumb1Only())
526 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
527 Subtarget->hasFPRegs()) {
536 if (!Subtarget->hasVFP2Base()) {
537 setAllExpand(MVT::f32);
543 if (!Subtarget->hasFP64()) {
544 setAllExpand(MVT::f64);
554 if (Subtarget->hasFullFP16()) {
569 if (Subtarget->hasBF16()) {
571 setAllExpand(MVT::bf16);
572 if (!Subtarget->hasFullFP16())
584 addAllExtLoads(VT, InnerVT,
Expand);
593 if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps())
596 if (!Subtarget->hasV8_1MMainlineOps())
599 if (!Subtarget->isThumb1Only())
608 if (Subtarget->hasMVEIntegerOps())
609 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
612 if (Subtarget->hasLOB()) {
616 if (Subtarget->hasNEON()) {
617 addDRTypeForNEON(MVT::v2f32);
618 addDRTypeForNEON(MVT::v8i8);
619 addDRTypeForNEON(MVT::v4i16);
620 addDRTypeForNEON(MVT::v2i32);
621 addDRTypeForNEON(MVT::v1i64);
623 addQRTypeForNEON(MVT::v4f32);
624 addQRTypeForNEON(MVT::v2f64);
625 addQRTypeForNEON(MVT::v16i8);
626 addQRTypeForNEON(MVT::v8i16);
627 addQRTypeForNEON(MVT::v4i32);
628 addQRTypeForNEON(MVT::v2i64);
630 if (Subtarget->hasFullFP16()) {
631 addQRTypeForNEON(MVT::v8f16);
632 addDRTypeForNEON(MVT::v4f16);
635 if (Subtarget->hasBF16()) {
636 addQRTypeForNEON(MVT::v8bf16);
637 addDRTypeForNEON(MVT::v4bf16);
641 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
681 if (Subtarget->hasNEON()) {
794 if (!Subtarget->hasVFP4Base()) {
803 for (
MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
812 for (
auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
821 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
829 if (Subtarget->hasMVEIntegerOps()) {
834 if (Subtarget->hasMVEFloatOps()) {
838 if (!Subtarget->hasFP64()) {
884 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
887 if (Subtarget->hasFullFP16()) {
895 if (!Subtarget->hasFP16()) {
924 if (!Subtarget->isThumb1Only()) {
949 if (Subtarget->hasDSP()) {
959 if (Subtarget->hasBaseDSP()) {
967 if (Subtarget->isThumb1Only()) {
971 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
972 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
987 if (Subtarget->hasMVEIntegerOps())
991 if (Subtarget->isThumb1Only()) {
997 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1011 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1020 if (Subtarget->hasPerfMon())
1024 if (!Subtarget->hasV6Ops())
1027 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1028 : Subtarget->hasDivideInARMMode();
1035 if (TT.isOSWindows() && !Subtarget->hasDivideInThumbMode()) {
1047 if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() ||
1048 TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isOSWindows()) {
1051 HasStandaloneRem =
false;
1078 if (TT.isOSWindows())
1085 InsertFencesForAtomic =
false;
1086 if (Subtarget->hasAnyDataBarrier() &&
1087 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1091 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1096 if (!Subtarget->hasAcquireRelease() ||
1099 InsertFencesForAtomic =
true;
1105 if (Subtarget->hasDataBarrier())
1106 InsertFencesForAtomic =
true;
1126 if (!InsertFencesForAtomic) {
1133 if (TT.isOSLinux() || (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1145 }
else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1146 Subtarget->hasForced32BitAtomics()) {
1160 if (!Subtarget->hasV6Ops()) {
1166 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1167 !Subtarget->isThumb1Only()) {
1196 if (Subtarget->hasFullFP16()) {
1206 if (Subtarget->hasFullFP16())
1221 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1222 !Subtarget->isThumb1Only()) {
1229 if (!Subtarget->hasVFP4Base()) {
1235 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1237 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1245 if (!Subtarget->hasFP16()) {
1265 if (Subtarget->hasFPARMv8Base()) {
1275 if (Subtarget->hasFP64())
1279 if (Subtarget->hasNEON()) {
1289 if (Subtarget->hasFullFP16()) {
1326 if (Subtarget->hasNEON()) {
1338 if (Subtarget->hasV8Ops()) {
1353 if (Subtarget->hasFullFP16()) {
1381 if (TT.isOSWindows()) {
1398 if (Subtarget->hasMVEIntegerOps())
1401 if (Subtarget->hasV6Ops())
1403 if (Subtarget->isThumb1Only())
1406 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1407 Subtarget->isThumb2()) {
1413 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1414 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1436 Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1444 return Subtarget->useSoftFloat();
1448 return !Subtarget->isThumb1Only() && VT.
getSizeInBits() <= 32;
1461std::pair<const TargetRegisterClass *, uint8_t>
1472 case MVT::f32:
case MVT::f64:
case MVT::v8i8:
case MVT::v4i16:
1473 case MVT::v2i32:
case MVT::v1i64:
case MVT::v2f32:
1474 RRC = &ARM::DPRRegClass;
1479 if (Subtarget->useNEONForSinglePrecisionFP())
1482 case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
1483 case MVT::v4f32:
case MVT::v2f64:
1484 RRC = &ARM::DPRRegClass;
1488 RRC = &ARM::DPRRegClass;
1492 RRC = &ARM::DPRRegClass;
1496 return std::make_pair(RRC,
Cost);
1505 if ((Subtarget->hasMVEIntegerOps() &&
1506 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1507 VT == MVT::v16i8)) ||
1508 (Subtarget->hasMVEFloatOps() &&
1509 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1523 if (Subtarget->hasNEON()) {
1524 if (VT == MVT::v4i64)
1525 return &ARM::QQPRRegClass;
1526 if (VT == MVT::v8i64)
1527 return &ARM::QQQQPRRegClass;
1529 if (Subtarget->hasMVEIntegerOps()) {
1530 if (VT == MVT::v4i64)
1531 return &ARM::MQQPRRegClass;
1532 if (VT == MVT::v8i64)
1533 return &ARM::MQQQQPRRegClass;
1542 Align &PrefAlign)
const {
1549 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ?
Align(8) :
Align(4));
1561 unsigned NumVals =
N->getNumValues();
1565 for (
unsigned i = 0; i != NumVals; ++i) {
1566 EVT VT =
N->getValueType(i);
1567 if (VT == MVT::Glue || VT == MVT::Other)
1573 if (!
N->isMachineOpcode())
1581 if (
MCID.getNumDefs() == 0)
1583 if (!Itins->isEmpty() &&
1584 Itins->getOperandCycle(
MCID.getSchedClass(), 0) > 2U)
1598 return Const->getZExtValue() == 16;
1606 return Const->getZExtValue() == 16;
1614 return Const->getZExtValue() == 16;
1683 bool isVarArg)
const {
1702 if (!
getTM().isAAPCS_ABI())
1704 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
1712 if (!
getTM().isAAPCS_ABI()) {
1713 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1716 }
else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1725 bool isVarArg)
const {
1726 return CCAssignFnForNode(CC,
false, isVarArg);
1730 bool isVarArg)
const {
1731 return CCAssignFnForNode(CC,
true, isVarArg);
1738 bool isVarArg)
const {
1739 switch (getEffectiveCallingConv(CC, isVarArg)) {
1765 if (Subtarget->hasFullFP16()) {
1766 Val = DAG.
getNode(ARMISD::VMOVhr, dl, ValVT, Val);
1778 if (Subtarget->hasFullFP16()) {
1779 Val = DAG.
getNode(ARMISD::VMOVrh, dl,
1792SDValue ARMTargetLowering::LowerCallResult(
1796 SDValue ThisVal,
bool isCmseNSCall)
const {
1804 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
1805 CCValAssign VA = RVLocs[i];
1809 if (i == 0 && isThisReturn) {
1811 "unexpected return calling convention register assignment");
1829 if (!Subtarget->isLittle())
1831 Val = DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64,
Lo,
Hi);
1846 if (!Subtarget->isLittle())
1848 Val = DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64,
Lo,
Hi);
1878 const ISD::InputArg &Arg = Ins[VA.
getValNo()];
1889std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
1891 bool IsTailCall,
int SPDiff)
const {
1893 MachinePointerInfo DstInfo;
1913 return std::make_pair(DstAddr, DstInfo);
1922ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
1935 if (!SrcFrameIdxNode || !DstFrameIdxNode)
1938 int SrcFI = SrcFrameIdxNode->getIndex();
1939 int DstFI = DstFrameIdxNode->getIndex();
1941 "byval passed in non-fixed stack slot");
1963 if (SrcOffset == DstOffset)
1971 RegsToPassVector &RegsToPass,
1978 DAG.
getVTList(MVT::i32, MVT::i32), Arg);
1979 unsigned id = Subtarget->isLittle() ? 0 : 1;
1991 MachinePointerInfo DstInfo;
1992 std::tie(DstAddr, DstInfo) =
1993 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2010 SelectionDAG &DAG = CLI.
DAG;
2012 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.
Outs;
2013 SmallVectorImpl<SDValue> &OutVals = CLI.
OutVals;
2014 SmallVectorImpl<ISD::InputArg> &Ins = CLI.
Ins;
2021 const CallBase *CB = CLI.
CB;
2024 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
2026 MachineFunction::CallSiteInfo CSInfo;
2027 bool isStructRet = (Outs.
empty()) ?
false : Outs[0].Flags.isSRet();
2028 bool isThisReturn =
false;
2029 bool isCmseNSCall =
false;
2030 bool isSibCall =
false;
2031 bool PreferIndirect =
false;
2032 bool GuardWithBTI =
false;
2042 !Subtarget->noBTIAtReturnTwice())
2047 CSInfo = MachineFunction::CallSiteInfo(*CB);
2051 isCmseNSCall =
true;
2054 if (!Subtarget->supportsTailCall())
2070 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2071 count_if(GV->users(), [&BB](
const User *U) {
2072 return isa<Instruction>(U) &&
2073 cast<Instruction>(U)->getParent() == BB;
2080 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2094 "site marked musttail");
2097 unsigned NumBytes = CCInfo.getStackSize();
2106 if (isTailCall && !isSibCall) {
2107 auto FuncInfo = MF.
getInfo<ARMFunctionInfo>();
2108 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2113 assert(StackAlign &&
"data layout string is missing stack alignment");
2114 NumBytes =
alignTo(NumBytes, *StackAlign);
2119 SPDiff = NumReusableBytes - NumBytes;
2123 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (
unsigned)-SPDiff)
2139 RegsToPassVector RegsToPass;
2148 DenseMap<unsigned, SDValue> ByValTemporaries;
2152 for (
const CCValAssign &VA : ArgLocs) {
2154 SDValue Src = OutVals[ArgIdx];
2155 ISD::ArgFlagsTy
Flags = Outs[ArgIdx].Flags;
2157 if (!
Flags.isByVal())
2161 MachinePointerInfo DstInfo;
2162 std::tie(Dst, DstInfo) =
2163 computeAddrForCallArg(dl, DAG, VA,
SDValue(),
true, SPDiff);
2164 ByValCopyKind
Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2166 if (Copy == NoCopy) {
2171 }
else if (Copy == CopyOnce) {
2175 ByValTemporaries[ArgIdx] = Src;
2177 assert(Copy == CopyViaTemp &&
"unexpected enum value");
2181 int TempFrameIdx = MFI.CreateStackObject(
2182 Flags.getByValSize(),
Flags.getNonZeroByValAlign(),
false);
2190 SDVTList VTs = DAG.
getVTList(MVT::Other, MVT::Glue);
2191 SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
2193 DAG.
getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops));
2194 ByValTemporaries[ArgIdx] = Temp;
2197 if (!ByValCopyChains.
empty())
2207 bool AfterFormalArgLoads =
false;
2211 for (
unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2213 ++i, ++realArgIdx) {
2214 CCValAssign &VA = ArgLocs[i];
2215 SDValue Arg = OutVals[realArgIdx];
2216 ISD::ArgFlagsTy
Flags = Outs[realArgIdx].Flags;
2217 bool isByVal =
Flags.isByVal();
2237 if (isTailCall && VA.
isMemLoc() && !AfterFormalArgLoads) {
2239 if (ByValTempChain) {
2244 for (
unsigned I = 0;
I < OutVals.
size(); ++
I) {
2245 if (Outs[
I].
Flags.isByVal())
2253 FrameIndexSDNode *FIN =
2258 if (!MFI.isFixedObjectIndex(FIN->
getIndex()))
2261 for (
const CCValAssign &VA : ArgLocs) {
2269 if (!IncomingLoad.
empty()) {
2277 AfterFormalArgLoads =
true;
2289 auto ArgVT = Outs[realArgIdx].ArgVT;
2290 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2308 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2309 StackPtr, MemOpChains, isTailCall, SPDiff);
2313 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2314 StackPtr, MemOpChains, isTailCall, SPDiff);
2318 MachinePointerInfo DstInfo;
2319 std::tie(DstAddr, DstInfo) =
2320 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2324 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2325 StackPtr, MemOpChains, isTailCall, SPDiff);
2327 if (realArgIdx == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
2328 Outs[0].VT == MVT::i32) {
2330 "unexpected calling convention register assignment");
2332 "unexpected use of 'returned'");
2333 isThisReturn =
true;
2338 RegsToPass.push_back(std::make_pair(VA.
getLocReg(), Arg));
2339 }
else if (isByVal) {
2341 unsigned offset = 0;
2345 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2346 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2349 bool NeedsStackCopy;
2350 if (
auto It = ByValTemporaries.
find(realArgIdx);
2351 It != ByValTemporaries.
end()) {
2352 ByValSrc = It->second;
2353 NeedsStackCopy =
true;
2356 NeedsStackCopy = !isTailCall;
2360 if (CurByValIdx < ByValArgsCount) {
2361 unsigned RegBegin, RegEnd;
2362 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2366 for (i = 0, j = RegBegin;
j < RegEnd; i++,
j++) {
2370 DAG.
getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2373 RegsToPass.push_back(std::make_pair(j, Load));
2378 offset = RegEnd - RegBegin;
2380 CCInfo.nextInRegsParam();
2385 if (NeedsStackCopy &&
Flags.getByValSize() > 4 * offset) {
2388 MachinePointerInfo DstInfo;
2389 std::tie(Dst, DstInfo) =
2390 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2398 SDVTList VTs = DAG.
getVTList(MVT::Other, MVT::Glue);
2399 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2406 MachinePointerInfo DstInfo;
2407 std::tie(DstAddr, DstInfo) =
2408 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2415 if (!MemOpChains.
empty())
2421 for (
const auto &[
Reg,
N] : RegsToPass) {
2429 bool isDirect =
false;
2432 const GlobalValue *GVal =
nullptr;
2434 GVal =
G->getGlobal();
2435 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2437 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2438 bool isLocalARMFunc =
false;
2441 if (Subtarget->genLongCalls()) {
2443 "long-calls codegen is not position independent!");
2448 if (Subtarget->genExecuteOnly()) {
2449 if (Subtarget->useMovt())
2461 Addr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2467 const char *Sym = S->getSymbol();
2469 if (Subtarget->genExecuteOnly()) {
2470 if (Subtarget->useMovt())
2482 Addr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2489 if (!PreferIndirect) {
2494 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !
ARMInterworking);
2496 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2497 assert(Subtarget->isTargetMachO() &&
"WrapperPIC use on non-MachO?");
2499 ARMISD::WrapperPIC, dl, PtrVt,
2506 }
else if (Subtarget->isTargetCOFF()) {
2507 assert(Subtarget->isTargetWindows() &&
2508 "Windows is the only supported COFF target");
2512 else if (!TM.shouldAssumeDSOLocal(GVal))
2519 DAG.
getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2528 const char *Sym = S->getSymbol();
2529 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2531 ARMConstantPoolValue *CPV =
2533 ARMPCLabelIndex, 4);
2535 CPAddr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2540 Callee = DAG.
getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2547 assert(!isARMFunc && !isDirect &&
2548 "Cannot handle call to ARM function or direct call");
2552 "call to non-secure function would require "
2553 "passing arguments on stack",
2559 "call to non-secure function would return value through pointer",
2566 if (Subtarget->isThumb()) {
2568 CallOpc = ARMISD::t2CALL_BTI;
2569 else if (isCmseNSCall)
2570 CallOpc = ARMISD::tSECALL;
2571 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2572 CallOpc = ARMISD::CALL_NOLINK;
2574 CallOpc = ARMISD::CALL;
2576 if (!isDirect && !Subtarget->hasV5TOps())
2577 CallOpc = ARMISD::CALL_NOLINK;
2578 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2580 !Subtarget->hasMinSize())
2582 CallOpc = ARMISD::CALL_NOLINK;
2584 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2591 if (isTailCall && !isSibCall) {
2596 std::vector<SDValue>
Ops;
2597 Ops.push_back(Chain);
2598 Ops.push_back(Callee);
2606 for (
const auto &[
Reg,
N] : RegsToPass)
2610 const uint32_t *
Mask;
2611 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2619 isThisReturn =
false;
2625 assert(Mask &&
"Missing call preserved mask for calling convention");
2629 Ops.push_back(InGlue);
2642 Chain = DAG.
getNode(CallOpc, dl, {MVT::Other, MVT::Glue},
Ops);
2653 uint64_t CalleePopBytes =
2656 Chain = DAG.
getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2662 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2663 InVals, isThisReturn,
2664 isThisReturn ? OutVals[0] :
SDValue(), isCmseNSCall);
2671void ARMTargetLowering::HandleByVal(
CCState *State,
unsigned &
Size,
2672 Align Alignment)
const {
2674 Alignment = std::max(Alignment,
Align(4));
2680 unsigned AlignInRegs = Alignment.
value() / 4;
2681 unsigned Waste = (ARM::R4 -
Reg) % AlignInRegs;
2682 for (
unsigned i = 0; i < Waste; ++i)
2688 unsigned Excess = 4 * (ARM::R4 -
Reg);
2695 if (NSAAOffset != 0 &&
Size > Excess) {
2707 unsigned ByValRegBegin =
Reg;
2708 unsigned ByValRegEnd = std::min<unsigned>(
Reg +
Size / 4, ARM::R4);
2712 for (
unsigned i =
Reg + 1; i != ByValRegEnd; ++i)
2718 Size = std::max<int>(
Size - Excess, 0);
2726bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2732 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.
Outs;
2733 const SmallVectorImpl<SDValue> &OutVals = CLI.
OutVals;
2734 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.
Ins;
2735 const SelectionDAG &DAG = CLI.
DAG;
2740 assert(Subtarget->supportsTailCall());
2753 SmallSet<MCPhysReg, 5> AddressRegisters = {ARM::R0, ARM::R1, ARM::R2,
2755 if (!(Subtarget->isThumb1Only() ||
2756 MF.
getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
true)))
2757 AddressRegisters.
insert(ARM::R12);
2758 for (
const CCValAssign &AL : ArgLocs)
2760 AddressRegisters.
erase(
AL.getLocReg());
2761 if (AddressRegisters.
empty()) {
2762 LLVM_DEBUG(
dbgs() <<
"false (no reg to hold function pointer)\n");
2781 <<
" (guaranteed tail-call CC)\n");
2782 return CalleeCC == CallerCC;
2787 bool isCalleeStructRet = Outs.
empty() ?
false : Outs[0].Flags.isSRet();
2789 if (isCalleeStructRet != isCallerStructRet) {
2802 const GlobalValue *GV =
G->getGlobal();
2805 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
2806 TT.isOSBinFormatMachO())) {
2815 getEffectiveCallingConv(CalleeCC, isVarArg),
2816 getEffectiveCallingConv(CallerCC, CallerF.
isVarArg()), MF,
C, Ins,
2823 const ARMBaseRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2824 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
2825 if (CalleeCC != CallerCC) {
2826 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
2827 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
2836 const ARMFunctionInfo *AFI_Caller = MF.
getInfo<ARMFunctionInfo>();
2846 LLVM_DEBUG(
dbgs() <<
"false (parameters in CSRs do not match)\n");
2865 CCState CCInfo(CallConv, isVarArg, MF, RVLocs,
Context);
2874 StringRef IntKind =
F.getFnAttribute(
"interrupt").getValueAsString();
2887 if (IntKind ==
"" || IntKind ==
"IRQ" || IntKind ==
"FIQ" ||
2890 else if (IntKind ==
"SWI" || IntKind ==
"UNDEF")
2894 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2899 return DAG.
getNode(ARMISD::INTRET_GLUE,
DL, MVT::Other, RetOps);
2921 bool isLittleEndian = Subtarget->isLittle();
2924 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
2933 "secure entry function would return value through pointer",
2938 for (
unsigned i = 0, realRVLocIdx = 0;
2940 ++i, ++realRVLocIdx) {
2941 CCValAssign &VA = RVLocs[i];
2944 SDValue Arg = OutVals[realRVLocIdx];
2945 bool ReturnF16 =
false;
2947 if (Subtarget->hasFullFP16() &&
getTM().isTargetHardFloat()) {
2980 auto RetVT = Outs[realRVLocIdx].ArgVT;
3002 DAG.
getVTList(MVT::i32, MVT::i32), Half);
3006 HalfGPRs.
getValue(isLittleEndian ? 0 : 1), Glue);
3012 HalfGPRs.
getValue(isLittleEndian ? 1 : 0), Glue);
3024 DAG.
getVTList(MVT::i32, MVT::i32), Arg);
3026 fmrrd.
getValue(isLittleEndian ? 0 : 1), Glue);
3031 fmrrd.
getValue(isLittleEndian ? 1 : 0), Glue);
3041 const ARMBaseRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3067 !Subtarget->isMClass()) {
3068 if (Subtarget->isThumb1Only())
3075 return DAG.
getNode(RetNode, dl, MVT::Other, RetOps);
3078bool ARMTargetLowering::isUsedByReturnOnly(
SDNode *
N,
SDValue &Chain)
const {
3079 if (
N->getNumValues() != 1)
3081 if (!
N->hasNUsesOfValue(1, 0))
3085 SDNode *
Copy = *
N->user_begin();
3089 if (
Copy->getOperand(
Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3091 TCChain =
Copy->getOperand(0);
3092 }
else if (
Copy->getOpcode() == ARMISD::VMOVRRD) {
3093 SDNode *VMov =
Copy;
3095 SmallPtrSet<SDNode*, 2>
Copies;
3096 for (SDNode *U : VMov->
users()) {
3104 for (SDNode *U : VMov->
users()) {
3105 SDValue UseChain =
U->getOperand(0);
3113 if (
U->getOperand(
U->getNumOperands() - 1).getValueType() == MVT::Glue)
3121 if (!
Copy->hasOneUse())
3128 if (
Copy->getOperand(
Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3130 TCChain =
Copy->getOperand(0);
3135 bool HasRet =
false;
3136 for (
const SDNode *U :
Copy->users()) {
3137 if (
U->getOpcode() != ARMISD::RET_GLUE &&
3138 U->getOpcode() != ARMISD::INTRET_GLUE)
3150bool ARMTargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
3151 if (!Subtarget->supportsTailCall())
3168 &&
"LowerWRITE_REGISTER called for non-i64 type argument.");
3184 EVT PtrVT =
Op.getValueType();
3194 if (Subtarget->genExecuteOnly()) {
3196 auto *
T =
CP->getType();
3197 auto C =
const_cast<Constant*
>(
CP->getConstVal());
3199 auto GV =
new GlobalVariable(
3207 return LowerGlobalAddress(GA, DAG);
3212 Align CPAlign =
CP->getAlign();
3213 if (Subtarget->isThumb1Only())
3214 CPAlign = std::max(CPAlign,
Align(4));
3215 if (
CP->isMachineConstantPoolEntry())
3220 return DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3227 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3236 unsigned ARMPCLabelIndex = 0;
3242 if (!IsPositionIndependent) {
3245 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3252 CPAddr = DAG.
getNode(ARMISD::Wrapper,
DL, PtrVT, CPAddr);
3256 if (!IsPositionIndependent)
3259 return DAG.
getNode(ARMISD::PIC_ADD,
DL, PtrVT, Result, PICLabel);
3287ARMTargetLowering::LowerGlobalTLSAddressDarwin(
SDValue Op,
3289 assert(Subtarget->isTargetDarwin() &&
3290 "This function expects a Darwin target");
3295 SDValue DescAddr = LowerGlobalAddressDarwin(
Op, DAG);
3301 MVT::i32,
DL, Chain, DescAddr,
3316 auto ARI =
static_cast<const ARMRegisterInfo *
>(
TRI);
3325 Chain, FuncTLVGet, DAG.
getRegister(ARM::R0, MVT::i32),
3331ARMTargetLowering::LowerGlobalTLSAddressWindows(
SDValue Op,
3333 assert(Subtarget->isTargetWindows() &&
"Windows specific TLS lowering");
3357 TLSArray = DAG.
getLoad(PtrVT,
DL, Chain, TLSArray, MachinePointerInfo());
3365 TLSIndex = DAG.
getNode(ARMISD::Wrapper,
DL, PtrVT, TLSIndex);
3366 TLSIndex = DAG.
getLoad(PtrVT,
DL, Chain, TLSIndex, MachinePointerInfo());
3372 MachinePointerInfo());
3379 DAG.
getNode(ARMISD::Wrapper,
DL, MVT::i32,
3392 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3394 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
3396 ARMConstantPoolValue *CPV =
3407 Argument = DAG.
getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3414 TargetLowering::CallLoweringInfo CLI(DAG);
3419 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
3420 return CallResult.first;
3429 const GlobalValue *GV = GA->
getGlobal();
3435 SDValue ThreadPointer = DAG.
getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3439 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
3442 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3443 ARMConstantPoolValue *CPV =
3450 PtrVT, dl, Chain,
Offset,
3458 PtrVT, dl, Chain,
Offset,
3463 ARMConstantPoolValue *CPV =
3468 PtrVT, dl, Chain,
Offset,
3483 if (Subtarget->isTargetDarwin())
3484 return LowerGlobalTLSAddressDarwin(
Op, DAG);
3486 if (Subtarget->isTargetWindows())
3487 return LowerGlobalTLSAddressWindows(
Op, DAG);
3490 assert(Subtarget->isTargetELF() &&
"Only ELF implemented here");
3496 return LowerToTLSGeneralDynamicModel(GA, DAG);
3499 return LowerToTLSExecModels(GA, DAG, model);
3508 while (!Worklist.
empty()) {
3516 if (!
I ||
I->getParent()->getParent() !=
F)
3545 if (!GVar || !GVar->hasInitializer() ||
3546 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3547 !GVar->hasLocalLinkage())
3552 auto *
Init = GVar->getInitializer();
3554 Init->needsDynamicRelocation())
3566 unsigned RequiredPadding = 4 - (
Size % 4);
3567 bool PaddingPossible =
3568 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3573 unsigned PaddedSize =
Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3597 if (RequiredPadding != 4) {
3602 while (RequiredPadding--)
3614 ++NumConstpoolPromoted;
3615 return DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3620 if (!(GV = GA->getAliaseeObject()))
3623 return V->isConstant();
3632 return LowerGlobalAddressWindows(
Op, DAG);
3634 return LowerGlobalAddressELF(
Op, DAG);
3636 return LowerGlobalAddressDarwin(
Op, DAG);
3648 if (GV->
isDSOLocal() && !Subtarget->genExecuteOnly())
3661 }
else if (Subtarget->isROPI() && IsRO) {
3666 }
else if (Subtarget->isRWPI() && !IsRO) {
3669 if (Subtarget->useMovt()) {
3672 RelAddr = DAG.
getNode(ARMISD::Wrapper, dl, PtrVT,
G);
3674 ARMConstantPoolValue *CPV =
3677 CPAddr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3691 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3692 if (Subtarget->useMovt())
3696 return DAG.
getNode(ARMISD::Wrapper, dl, PtrVT,
3700 CPAddr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3709 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3710 "ROPI/RWPI not currently supported for Darwin");
3715 if (Subtarget->useMovt())
3726 if (Subtarget->isGVIndirectSymbol(GV))
3734 assert(Subtarget->isTargetWindows() &&
"non-Windows COFF is not supported");
3735 assert(Subtarget->useMovt() &&
3736 "Windows on ARM expects to use movw/movt");
3737 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3738 "ROPI/RWPI not currently supported for Windows");
3745 else if (!TM.shouldAssumeDSOLocal(GV))
3768 return DAG.
getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3769 DAG.
getVTList(MVT::i32, MVT::Other),
Op.getOperand(0),
3770 Op.getOperand(1), Val);
3776 return DAG.
getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
Op.getOperand(0),
3783 return DAG.
getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3787SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3790 Op.getConstantOperandVal(
Op.getOperand(0).getValueType() == MVT::Other);
3794 case Intrinsic::arm_gnu_eabi_mcount: {
3800 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3801 const uint32_t *
Mask =
3803 assert(Mask &&
"Missing call preserved mask for calling convention");
3808 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
3812 if (Subtarget->isThumb())
3815 ARM::tBL_PUSHLR, dl, ResultTys,
3816 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3817 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3821 {ReturnAddress, Callee, RegisterMask, Chain}),
3830 unsigned IntNo =
Op.getConstantOperandVal(0);
3834 case Intrinsic::thread_pointer: {
3836 return DAG.
getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3838 case Intrinsic::arm_cls: {
3839 const SDValue &Operand =
Op.getOperand(1);
3840 const EVT VTy =
Op.getValueType();
3851 case Intrinsic::arm_cls64: {
3854 const SDValue &Operand =
Op.getOperand(1);
3855 const EVT VTy =
Op.getValueType();
3878 case Intrinsic::eh_sjlj_lsda: {
3880 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
3885 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3886 ARMConstantPoolValue *CPV =
3890 CPAddr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3895 if (IsPositionIndependent) {
3897 Result = DAG.
getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3901 case Intrinsic::arm_neon_vabs:
3904 case Intrinsic::arm_neon_vabds:
3905 if (
Op.getValueType().isInteger())
3907 Op.getOperand(1),
Op.getOperand(2));
3909 case Intrinsic::arm_neon_vabdu:
3911 Op.getOperand(1),
Op.getOperand(2));
3912 case Intrinsic::arm_neon_vmulls:
3913 case Intrinsic::arm_neon_vmullu: {
3914 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3915 ? ARMISD::VMULLs : ARMISD::VMULLu;
3916 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3917 Op.getOperand(1),
Op.getOperand(2));
3919 case Intrinsic::arm_neon_vminnm:
3920 case Intrinsic::arm_neon_vmaxnm: {
3921 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3923 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3924 Op.getOperand(1),
Op.getOperand(2));
3926 case Intrinsic::arm_neon_vminu:
3927 case Intrinsic::arm_neon_vmaxu: {
3928 if (
Op.getValueType().isFloatingPoint())
3930 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3932 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3933 Op.getOperand(1),
Op.getOperand(2));
3935 case Intrinsic::arm_neon_vmins:
3936 case Intrinsic::arm_neon_vmaxs: {
3938 if (!
Op.getValueType().isFloatingPoint()) {
3939 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3941 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3942 Op.getOperand(1),
Op.getOperand(2));
3944 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3946 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3947 Op.getOperand(1),
Op.getOperand(2));
3949 case Intrinsic::arm_neon_vtbl1:
3950 return DAG.
getNode(ARMISD::VTBL1, SDLoc(
Op),
Op.getValueType(),
3951 Op.getOperand(1),
Op.getOperand(2));
3952 case Intrinsic::arm_neon_vtbl2:
3953 return DAG.
getNode(ARMISD::VTBL2, SDLoc(
Op),
Op.getValueType(),
3954 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3955 case Intrinsic::arm_mve_pred_i2v:
3956 case Intrinsic::arm_mve_pred_v2i:
3957 return DAG.
getNode(ARMISD::PREDICATE_CAST, SDLoc(
Op),
Op.getValueType(),
3959 case Intrinsic::arm_mve_vreinterpretq:
3960 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, SDLoc(
Op),
Op.getValueType(),
3962 case Intrinsic::arm_mve_lsll:
3963 return DAG.
getNode(ARMISD::LSLL, SDLoc(
Op),
Op->getVTList(),
3964 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3965 case Intrinsic::arm_mve_asrl:
3966 return DAG.
getNode(ARMISD::ASRL, SDLoc(
Op),
Op->getVTList(),
3967 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3978 if (!Subtarget->hasDataBarrier()) {
3982 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3983 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3984 return DAG.
getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other,
Op.getOperand(0),
3994 }
else if (Subtarget->preferISHSTBarriers() &&
4003 DAG.
getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4011 (!Subtarget->
isThumb1Only() && Subtarget->hasV5TEOps())))
4013 return Op.getOperand(0);
4016 unsigned isRead =
~Op.getConstantOperandVal(2) & 1;
4018 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4020 return Op.getOperand(0);
4022 unsigned isData =
Op.getConstantOperandVal(4);
4023 if (Subtarget->isThumb()) {
4025 isRead = ~isRead & 1;
4026 isData = ~isData & 1;
4029 return DAG.
getNode(ARMISD::PRELOAD, dl, MVT::Other,
Op.getOperand(0),
4044 return DAG.
getStore(
Op.getOperand(0), dl, FR,
Op.getOperand(1),
4052 const SDLoc &dl)
const {
4054 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
4056 const TargetRegisterClass *RC;
4058 RC = &ARM::tGPRRegClass;
4060 RC = &ARM::GPRRegClass;
4074 MVT::i32, dl, Root, FIN,
4080 if (!Subtarget->isLittle())
4082 return DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4095 const Value *OrigArg,
4096 unsigned InRegsParamRecordIdx,
4097 int ArgOffset,
unsigned ArgSize)
const {
4111 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
4112 unsigned RBegin, REnd;
4117 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 :
GPRArgRegs[RBeginIdx];
4122 ArgOffset = -4 * (ARM::R4 - RBegin);
4129 const TargetRegisterClass *RC =
4132 for (
unsigned Reg = RBegin, i = 0;
Reg < REnd; ++
Reg, ++i) {
4136 MachinePointerInfo(OrigArg, 4 * i));
4141 if (!MemOps.
empty())
4150 unsigned TotalArgRegsSaveSize,
4151 bool ForceMutable)
const {
4153 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
4162 CCInfo.
getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4166bool ARMTargetLowering::splitValueIntoRegisterParts(
4168 unsigned NumParts,
MVT PartVT, std::optional<CallingConv::ID> CC)
const {
4170 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4182SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4184 MVT PartVT,
EVT ValueVT, std::optional<CallingConv::ID> CC)
const {
4185 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4198SDValue ARMTargetLowering::LowerFormalArguments(
4205 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
4214 unsigned CurArgIdx = 0;
4226 unsigned ArgRegBegin = ARM::R4;
4227 for (
const CCValAssign &VA : ArgLocs) {
4233 if (!
Flags.isByVal())
4237 unsigned RBegin, REnd;
4239 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4245 int lastInsIndex = -1;
4249 ArgRegBegin = std::min(ArgRegBegin, (
unsigned)
GPRArgRegs[RegIdx]);
4252 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4256 for (
unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4257 CCValAssign &VA = ArgLocs[i];
4258 if (Ins[VA.
getValNo()].isOrigArg()) {
4259 std::advance(CurOrigArg,
4260 Ins[VA.
getValNo()].getOrigArgIndex() - CurArgIdx);
4261 CurArgIdx = Ins[VA.
getValNo()].getOrigArgIndex();
4272 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4279 MVT::f64, dl, Chain, FIN,
4282 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4290 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4292 const TargetRegisterClass *RC;
4294 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4295 RC = &ARM::HPRRegClass;
4296 else if (RegVT == MVT::f32)
4297 RC = &ARM::SPRRegClass;
4298 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4299 RegVT == MVT::v4bf16)
4300 RC = &ARM::DPRRegClass;
4301 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4302 RegVT == MVT::v8bf16)
4303 RC = &ARM::QPRRegClass;
4304 else if (RegVT == MVT::i32)
4306 : &ARM::GPRRegClass;
4343 const ISD::InputArg &Arg = Ins[VA.
getValNo()];
4352 assert(VA.
getValVT() != MVT::i64 &&
"i64 should already be lowered");
4358 if (index != lastInsIndex)
4360 ISD::ArgFlagsTy
Flags = Ins[index].Flags;
4366 if (
Flags.isByVal()) {
4367 assert(Ins[index].isOrigArg() &&
4368 "Byval arguments cannot be implicit");
4372 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4406 lastInsIndex = index;
4413 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.
getStackSize(),
4414 TotalArgRegsSaveSize);
4418 "secure entry function must not be variadic", dl.
getDebugLoc()));
4428 assert(StackAlign &&
"data layout string is missing stack alignment");
4429 StackArgSize =
alignTo(StackArgSize, *StackAlign);
4438 "secure entry function requires arguments on stack", dl.
getDebugLoc()));
4447 return CFP->getValueAPF().isPosZero();
4450 if (
Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4451 SDValue WrapperOp =
Op.getOperand(1).getOperand(0);
4454 return CFP->getValueAPF().isPosZero();
4457 Op->getValueType(0) == MVT::f64) {
4461 if (BitcastOp->
getOpcode() == ARMISD::VMOVIMM &&
4472 const SDLoc &dl)
const {
4474 unsigned C = RHSC->getZExtValue();
4538 if (Subtarget->isThumb1Only() &&
LHS->getOpcode() ==
ISD::AND &&
4542 unsigned Mask =
LHS.getConstantOperandVal(1);
4544 uint64_t RHSV = RHSC->getZExtValue();
4545 if (
isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4547 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4561 if (Subtarget->isThumb1Only() &&
LHS->getOpcode() ==
ISD::SHL &&
4564 LHS.getConstantOperandVal(1) < 31) {
4565 unsigned ShiftAmt =
LHS.getConstantOperandVal(1) + 1;
4590 unsigned CompareType;
4593 CompareType = ARMISD::CMP;
4598 CompareType = ARMISD::CMPZ;
4608 bool Signaling)
const {
4609 assert(Subtarget->hasFP64() ||
RHS.getValueType() != MVT::f64);
4615 Flags = DAG.
getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
4624std::pair<SDValue, SDValue>
4627 assert(
Op.getValueType() == MVT::i32 &&
"Unsupported value type");
4639 switch (
Op.getOpcode()) {
4691 return std::make_pair(
Value, OverflowCmp);
4702 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
Op, DAG, ARMcc);
4707 EVT VT =
Op.getValueType();
4710 DAG.
getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
4712 SDVTList VTs = DAG.
getVTList(
Op.getValueType(), MVT::i32);
4750 EVT VT =
Op.getValueType();
4751 SDVTList VTs = DAG.
getVTList(VT, MVT::i32);
4754 switch (
Op.getOpcode()) {
4779 EVT VT =
Op.getValueType();
4780 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->
isThumb1Only())
4790 switch (
Op->getOpcode()) {
4792 NewOpcode = ARMISD::UQADD8b;
4795 NewOpcode = ARMISD::QADD8b;
4798 NewOpcode = ARMISD::UQSUB8b;
4801 NewOpcode = ARMISD::QSUB8b;
4806 switch (
Op->getOpcode()) {
4808 NewOpcode = ARMISD::UQADD16b;
4811 NewOpcode = ARMISD::QADD16b;
4814 NewOpcode = ARMISD::UQSUB16b;
4817 NewOpcode = ARMISD::QSUB16b;
4825 DAG.
getNode(NewOpcode, dl, MVT::i32,
4836 unsigned Opc =
Cond.getOpcode();
4838 if (
Cond.getResNo() == 1 &&
4846 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
Cond, DAG, ARMcc);
4847 EVT VT =
Op.getValueType();
4849 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
4857 if (
Cond.getOpcode() == ARMISD::CMOV &&
Cond.hasOneUse()) {
4858 const ConstantSDNode *CMOVTrue =
4860 const ConstantSDNode *CMOVFalse =
4863 if (CMOVTrue && CMOVFalse) {
4869 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4871 False = SelectFalse;
4872 }
else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4878 return getCMOV(dl,
Op.getValueType(), True, False,
Cond.getOperand(2),
4879 Cond.getOperand(3), DAG);
4894 bool &swpCmpOps,
bool &swpVselOps) {
4922 swpCmpOps = !swpCmpOps;
4923 swpVselOps = !swpVselOps;
4946 if (!Subtarget->hasFP64() && VT == MVT::f64) {
4948 DAG.
getVTList(MVT::i32, MVT::i32), FalseVal);
4950 DAG.
getVTList(MVT::i32, MVT::i32), TrueVal);
4964 return DAG.
getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
4985 ((K ==
LHS && K == TrueVal) || (K ==
RHS && K == FalseVal))) ||
4987 ((K ==
RHS && K == TrueVal) || (K ==
LHS && K == FalseVal)));
5008 EVT VT =
Op.getValueType();
5030 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5043 int64_t PosVal = std::max(Val1, Val2);
5044 int64_t NegVal = std::min(Val1, Val2);
5056 return DAG.
getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5059 return DAG.
getNode(ARMISD::USAT, dl, VT, V2Tmp,
5091 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5096 if (*K != KTmp || V != VTmp)
5107bool ARMTargetLowering::isUnsupportedFloatingType(
EVT VT)
const {
5109 return !Subtarget->hasVFP2Base();
5111 return !Subtarget->hasFP64();
5113 return !Subtarget->hasFullFP16();
5118 EVT VT =
Op.getValueType();
5122 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5134 if (VT == MVT::i32 &&
5154 if (
Op.getValueType().isInteger()) {
5162 LHS.getValueType() ==
RHS.getValueType()) {
5163 EVT VT =
LHS.getValueType();
5169 Shift = DAG.
getNOT(dl, Shift, VT);
5175 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5176 LHS.getValueType() == MVT::i32 &&
RHS.getValueType() == MVT::i32) {
5179 unsigned Opcode = 0;
5181 if (TVal == ~FVal) {
5182 Opcode = ARMISD::CSINV;
5183 }
else if (TVal == ~FVal + 1) {
5184 Opcode = ARMISD::CSNEG;
5185 }
else if (TVal + 1 == FVal) {
5186 Opcode = ARMISD::CSINC;
5187 }
else if (TVal == FVal + 1) {
5188 Opcode = ARMISD::CSINC;
5197 if (Opcode != ARMISD::CSINC &&
5207 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5218 EVT VT =
TrueVal.getValueType();
5219 return DAG.
getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5223 if (isUnsupportedFloatingType(
LHS.getValueType())) {
5228 if (!
RHS.getNode()) {
5234 if (
LHS.getValueType() == MVT::i32) {
5245 if (Subtarget->hasFPARMv8Base() && (
TrueVal.getValueType() == MVT::f16 ||
5246 TrueVal.getValueType() == MVT::f32 ||
5247 TrueVal.getValueType() == MVT::f64)) {
5261 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5271 if (Subtarget->hasFPARMv8Base() &&
5273 (
TrueVal.getValueType() == MVT::f16 ||
5274 TrueVal.getValueType() == MVT::f32 ||
5275 TrueVal.getValueType() == MVT::f64)) {
5276 bool swpCmpOps =
false;
5277 bool swpVselOps =
false;
5291 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5294 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
5304 if (!
N->hasOneUse())
5307 if (!
N->getNumValues())
5309 EVT VT =
Op.getValueType();
5310 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5327 return DAG.
getLoad(MVT::i32,
SDLoc(
Op), Ld->getChain(), Ld->getBasePtr(),
5328 Ld->getPointerInfo(), Ld->getAlign(),
5329 Ld->getMemOperand()->getFlags());
5345 SDValue Ptr = Ld->getBasePtr();
5347 DAG.
getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5348 Ld->getAlign(), Ld->getMemOperand()->
getFlags());
5353 RetVal2 = DAG.
getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5354 Ld->getPointerInfo().getWithOffset(4),
5356 Ld->getMemOperand()->getFlags());
5374 bool LHSSeenZero =
false;
5376 bool RHSSeenZero =
false;
5378 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5389 if (
LHS.getValueType() == MVT::f32) {
5395 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5407 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5408 return DAG.
getNode(ARMISD::BCC_i64, dl, MVT::Other,
Ops);
5423 return DAG.
getNode(ARMISD::CMOV,
DL, MVT::i32,
Op.getOperand(0), Neg,
5435 unsigned Opc =
Cond.getOpcode();
5437 !Subtarget->isThumb1Only();
5438 if (
Cond.getResNo() == 1 &&
5448 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
Cond, DAG, ARMcc);
5454 ARMcc = DAG.
getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5456 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5471 if (isUnsupportedFloatingType(
LHS.getValueType())) {
5476 if (!
RHS.getNode()) {
5484 unsigned Opc =
LHS.getOpcode();
5486 !Subtarget->isThumb1Only();
5498 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
LHS.getValue(0), DAG, ARMcc);
5505 ARMcc = DAG.
getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5508 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5512 if (
LHS.getValueType() == MVT::i32) {
5515 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
5518 SDNodeFlags
Flags =
Op->getFlags();
5519 if (
Flags.hasNoNaNs() &&
5524 if (
SDValue Result = OptimizeVFPBrcond(
Op, DAG))
5538 Res = DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other,
Ops);
5552 Table = DAG.
getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5555 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5560 return DAG.
getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5561 Addr,
Op.getOperand(2), JTI);
5565 DAG.
getLoad((EVT)MVT::i32, dl, Chain, Addr,
5569 return DAG.
getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5572 DAG.
getLoad(PTy, dl, Chain, Addr,
5575 return DAG.
getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5580 EVT VT =
Op.getValueType();
5583 if (
Op.getValueType().getVectorElementType() == MVT::i32) {
5584 if (
Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5592 const EVT OpTy =
Op.getOperand(0).getValueType();
5593 if (OpTy == MVT::v4f32)
5595 else if (OpTy == MVT::v4f16 && HasFullFP16)
5597 else if (OpTy == MVT::v8f16 && HasFullFP16)
5602 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5605 Op = DAG.
getNode(
Op.getOpcode(), dl, NewTy,
Op.getOperand(0));
5610 EVT VT =
Op.getValueType();
5614 bool IsStrict =
Op->isStrictFPOpcode();
5615 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5617 if (isUnsupportedFloatingType(SrcVal.
getValueType())) {
5630 std::tie(Result, Chain) =
makeLibCall(DAG, LC,
Op.getValueType(), SrcVal,
5631 CallOptions, Loc, Chain);
5641 Loc,
Op.getValueType(), SrcVal);
5650 EVT VT =
Op.getValueType();
5652 EVT FromVT =
Op.getOperand(0).getValueType();
5654 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5656 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5657 Subtarget->hasFP64())
5659 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5660 Subtarget->hasFullFP16())
5662 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5663 Subtarget->hasMVEFloatOps())
5665 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5666 Subtarget->hasMVEFloatOps())
5669 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5686 EVT VT =
Op.getValueType();
5689 if (
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5695 assert((
Op.getOperand(0).getValueType() == MVT::v4i16 ||
5696 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5697 "Invalid type for custom lowering!");
5702 if (VT == MVT::v4f32)
5703 DestVecType = MVT::v4i32;
5704 else if (VT == MVT::v4f16 && HasFullFP16)
5705 DestVecType = MVT::v4i16;
5706 else if (VT == MVT::v8f16 && HasFullFP16)
5707 DestVecType = MVT::v8i16;
5713 switch (
Op.getOpcode()) {
5725 Op = DAG.
getNode(CastOpc, dl, DestVecType,
Op.getOperand(0));
5730 EVT VT =
Op.getValueType();
5733 if (isUnsupportedFloatingType(VT)) {
5743 CallOptions, SDLoc(
Op)).first;
5754 EVT VT =
Op.getValueType();
5758 bool UseNEON = !InGPR && Subtarget->hasNEON();
5765 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5772 if (SrcVT == MVT::f32) {
5775 Tmp1 = DAG.
getNode(ARMISD::VSHLIMM, dl, OpVT,
5778 }
else if (VT == MVT::f32)
5779 Tmp1 = DAG.
getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
5794 if (VT == MVT::f32) {
5806 if (SrcVT == MVT::f64)
5815 if (VT == MVT::f32) {
5828 return DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64,
Lo,
Hi);
5836 EVT VT =
Op.getValueType();
5838 unsigned Depth =
Op.getConstantOperandVal(0);
5840 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
5844 MachinePointerInfo());
5853 const ARMBaseRegisterInfo &ARI =
5854 *
static_cast<const ARMBaseRegisterInfo*
>(RegInfo);
5859 EVT VT =
Op.getValueType();
5861 unsigned Depth =
Op.getConstantOperandVal(0);
5866 MachinePointerInfo());
5874 return StringSwitch<Register>(
RegName)
5875 .Case(
"sp", ARM::SP)
5886 assert(
N->getValueType(0) == MVT::i64
5887 &&
"ExpandREAD_REGISTER called for non-i64 type result.");
5890 DAG.
getVTList(MVT::i32, MVT::i32, MVT::Other),
5930 const APInt &APIntIndex = Index->getAPIntValue();
5932 NewIndex *= APIntIndex;
5961 EVT SrcVT =
Op.getValueType();
5962 EVT DstVT =
N->getValueType(0);
5964 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
5965 (DstVT == MVT::f16 || DstVT == MVT::bf16))
5966 return MoveToHPR(SDLoc(
N), DAG, MVT::i32, DstVT.
getSimpleVT(),
5969 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
5970 (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {
5971 if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())
5978 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
5990 DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64,
Lo,
Hi));
5998 Cvt = DAG.
getNode(ARMISD::VMOVRRD, dl,
6000 DAG.
getNode(ARMISD::VREV64, dl, SrcVT,
Op));
6002 Cvt = DAG.
getNode(ARMISD::VMOVRRD, dl,
6022 SDValue Vmov = DAG.
getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6031 EVT VT =
Op.getValueType();
6053 DAG.
getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
6063 DAG.
getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6074 EVT VT =
Op.getValueType();
6095 DAG.
getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6116 DAG.
getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6168 Chain, DAG.
getConstant(Intrinsic::arm_set_fpscr,
DL, MVT::i32), FPSCR};
6196 Chain, DAG.
getConstant(Intrinsic::arm_set_fpscr,
DL, MVT::i32), FPSCR};
6226 EVT VT =
N->getValueType(0);
6227 if (VT.
isVector() && ST->hasNEON()) {
6236 if (ElemTy == MVT::i8) {
6244 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6247 unsigned NumBits = ElemTy.getSizeInBits();
6249 DAG.
getNode(ARMISD::VMOVIMM, dl, VT,
6259 if (ElemTy == MVT::i64) {
6272 if (!ST->hasV6T2Ops())
6281 EVT VT =
N->getValueType(0);
6284 assert(ST->hasNEON() &&
"Custom ctpop lowering requires NEON.");
6285 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6286 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6287 "Unexpected type for custom ctpop lowering");
6295 unsigned EltSize = 8;
6318 Op =
Op.getOperand(0);
6320 APInt SplatBits, SplatUndef;
6321 unsigned SplatBitSize;
6324 !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6326 SplatBitSize > ElementBits)
6337 assert(VT.
isVector() &&
"vector shift count is not a vector type");
6341 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6352 assert(VT.
isVector() &&
"vector shift count is not a vector type");
6357 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6358 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6367 EVT VT =
N->getValueType(0);
6382 return DAG.
getNode(ARMISD::VSHLIMM, dl, VT,
N->getOperand(0),
6384 return DAG.
getNode(ARMISD::VSHLu, dl, VT,
N->getOperand(0),
6389 "unexpected vector shift opcode");
6391 if (
isVShiftRImm(
N->getOperand(1), VT,
false,
false, Cnt)) {
6392 unsigned VShiftOpc =
6393 (
N->getOpcode() ==
ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6394 return DAG.
getNode(VShiftOpc, dl, VT,
N->getOperand(0),
6400 EVT ShiftVT =
N->getOperand(1).getValueType();
6403 unsigned VShiftOpc =
6404 (
N->getOpcode() ==
ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6405 return DAG.
getNode(VShiftOpc, dl, VT,
N->getOperand(0), NegatedCount);
6410 EVT VT =
N->getValueType(0);
6419 "Unknown shift to lower!");
6421 unsigned ShOpc =
N->getOpcode();
6422 if (ST->hasMVEIntegerOps()) {
6424 unsigned ShPartsOpc = ARMISD::LSLL;
6445 ShPartsOpc = ARMISD::LSRL;
6447 ShPartsOpc = ARMISD::ASRL;
6452 DAG.
SplitScalar(
N->getOperand(0), dl, MVT::i32, MVT::i32);
6466 if (ST->isThumb1Only())
6471 std::tie(
Lo,
Hi) = DAG.
SplitScalar(
N->getOperand(0), dl, MVT::i32, MVT::i32);
6475 unsigned Opc =
N->getOpcode() ==
ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
6479 Lo = DAG.
getNode(ARMISD::RRX, dl, MVT::i32,
Lo,
Hi.getValue(1));
6487 bool Invert =
false;
6494 EVT VT =
Op.getValueType();
6502 assert(ST->hasMVEIntegerOps() &&
6503 "No hardware support for integer vector comparison!");
6505 if (
Op.getValueType().getVectorElementType() != MVT::i1)
6526 SDValue Reversed = DAG.
getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6530 Merged = DAG.
getNOT(dl, Merged, CmpVT);
6540 switch (SetCCOpcode) {
6544 if (ST->hasMVEFloatOps()) {
6547 Invert =
true; [[fallthrough]];
6552 case ISD::SETLT: Swap =
true; [[fallthrough]];
6556 case ISD::SETLE: Swap =
true; [[fallthrough]];
6572 Result = DAG.
getNOT(dl, Result, VT);
6575 case ISD::SETUO: Invert =
true; [[fallthrough]];
6584 Result = DAG.
getNOT(dl, Result, VT);
6590 switch (SetCCOpcode) {
6593 if (ST->hasMVEIntegerOps()) {
6596 Invert =
true; [[fallthrough]];
6599 case ISD::SETLT: Swap =
true; [[fallthrough]];
6601 case ISD::SETLE: Swap =
true; [[fallthrough]];
6618 if (AndOp.getNode() && AndOp.getOpcode() ==
ISD::BITCAST)
6621 if (AndOp.getNode() && AndOp.getOpcode() ==
ISD::AND) {
6626 Result = DAG.
getNOT(dl, Result, VT);
6651 Result = DAG.
getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6654 Result = DAG.
getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6660 Result = DAG.
getNOT(dl, Result, VT);
6672 assert(
LHS.getSimpleValueType().isInteger() &&
"SETCCCARRY is integer only.");
6688 return DAG.
getNode(ARMISD::CMOV,
DL,
Op.getValueType(), FVal, TVal, ARMcc,
6699 unsigned OpCmode, Imm;
6710 switch (SplatBitSize) {
6715 assert((SplatBits & ~0xff) == 0 &&
"one byte splat value is too big");
6718 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6723 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6724 if ((SplatBits & ~0xff) == 0) {
6730 if ((SplatBits & ~0xff00) == 0) {
6733 Imm = SplatBits >> 8;
6743 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6744 if ((SplatBits & ~0xff) == 0) {
6750 if ((SplatBits & ~0xff00) == 0) {
6753 Imm = SplatBits >> 8;
6756 if ((SplatBits & ~0xff0000) == 0) {
6759 Imm = SplatBits >> 16;
6762 if ((SplatBits & ~0xff000000) == 0) {
6765 Imm = SplatBits >> 24;
6772 if ((SplatBits & ~0xffff) == 0 &&
6773 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6776 Imm = SplatBits >> 8;
6784 if ((SplatBits & ~0xffffff) == 0 &&
6785 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
6788 Imm = SplatBits >> 16;
6804 unsigned ImmMask = 1;
6806 for (
int ByteNum = 0; ByteNum < 8; ++ByteNum) {
6807 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
6809 }
else if ((SplatBits & BitMask) != 0) {
6818 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
6832 EVT VT =
Op.getValueType();
6833 bool IsDouble = (VT == MVT::f64);
6839 if (
ST->genExecuteOnly()) {
6841 assert((!
ST->isThumb1Only() ||
ST->hasV8MBaselineOps()) &&
6842 "Unexpected architecture");
6860 return DAG.
getNode(ARMISD::VMOVSR,
DL, VT,
6865 if (!
ST->hasVFP3Base())
6870 if (IsDouble && !Subtarget->hasFP64())
6877 if (IsDouble || !
ST->useNEONForSinglePrecisionFP()) {
6895 if (!
ST->hasNEON() || (!IsDouble && !
ST->useNEONForSinglePrecisionFP()))
6904 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
6958 unsigned ExpectedElt = Imm;
6959 for (
unsigned i = 1; i < NumElts; ++i) {
6963 if (ExpectedElt == NumElts)
6966 if (M[i] < 0)
continue;
6967 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
6975 bool &ReverseVEXT,
unsigned &Imm) {
6977 ReverseVEXT =
false;
6988 unsigned ExpectedElt = Imm;
6989 for (
unsigned i = 1; i < NumElts; ++i) {
6993 if (ExpectedElt == NumElts * 2) {
6998 if (M[i] < 0)
continue;
6999 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
7014 return VT == MVT::v8i8 && M.size() == 8;
7019 if (Mask.size() == Elements * 2)
7020 return Index / Elements;
7021 return Mask[Index] == 0 ? 0 : 1;
7051 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7059 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7061 for (
unsigned j = 0; j < NumElts; j += 2) {
7062 if ((M[i+j] >= 0 && (
unsigned) M[i+j] != j + WhichResult) ||
7063 (M[i+j+1] >= 0 && (
unsigned) M[i+j+1] != j + NumElts + WhichResult))
7068 if (M.size() == NumElts*2)
7083 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7086 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7088 for (
unsigned j = 0; j < NumElts; j += 2) {
7089 if ((M[i+j] >= 0 && (
unsigned) M[i+j] != j + WhichResult) ||
7090 (M[i+j+1] >= 0 && (
unsigned) M[i+j+1] != j + WhichResult))
7095 if (M.size() == NumElts*2)
7115 if (M.size() != NumElts && M.size() != NumElts*2)
7118 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7120 for (
unsigned j = 0; j < NumElts; ++j) {
7121 if (M[i+j] >= 0 && (
unsigned) M[i+j] != 2 * j + WhichResult)
7126 if (M.size() == NumElts*2)
7145 if (M.size() != NumElts && M.size() != NumElts*2)
7148 unsigned Half = NumElts / 2;
7149 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7151 for (
unsigned j = 0; j < NumElts; j += Half) {
7152 unsigned Idx = WhichResult;
7153 for (
unsigned k = 0; k < Half; ++k) {
7154 int MIdx = M[i + j + k];
7155 if (MIdx >= 0 && (
unsigned) MIdx != Idx)
7162 if (M.size() == NumElts*2)
7186 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7189 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7191 unsigned Idx = WhichResult * NumElts / 2;
7192 for (
unsigned j = 0; j < NumElts; j += 2) {
7193 if ((M[i+j] >= 0 && (
unsigned) M[i+j] != Idx) ||
7194 (M[i+j+1] >= 0 && (
unsigned) M[i+j+1] != Idx + NumElts))
7200 if (M.size() == NumElts*2)
7219 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7222 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7224 unsigned Idx = WhichResult * NumElts / 2;
7225 for (
unsigned j = 0; j < NumElts; j += 2) {
7226 if ((M[i+j] >= 0 && (
unsigned) M[i+j] != Idx) ||
7227 (M[i+j+1] >= 0 && (
unsigned) M[i+j+1] != Idx))
7233 if (M.size() == NumElts*2)
7246 unsigned &WhichResult,
7249 if (
isVTRNMask(ShuffleMask, VT, WhichResult))
7250 return ARMISD::VTRN;
7251 if (
isVUZPMask(ShuffleMask, VT, WhichResult))
7252 return ARMISD::VUZP;
7253 if (
isVZIPMask(ShuffleMask, VT, WhichResult))
7254 return ARMISD::VZIP;
7258 return ARMISD::VTRN;
7260 return ARMISD::VUZP;
7262 return ARMISD::VZIP;
7271 if (NumElts != M.size())
7275 for (
unsigned i = 0; i != NumElts; ++i)
7276 if (M[i] >= 0 && M[i] != (
int) (NumElts - 1 - i))
7285 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7293 int Ofs = Top ? 1 : 0;
7294 int Upper = SingleSource ? 0 : NumElts;
7295 for (
int i = 0, e = NumElts / 2; i != e; ++i) {
7296 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7298 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs +
Upper)
7307 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7316 unsigned Offset = Top ? 0 : 1;
7317 unsigned N = SingleSource ? 0 : NumElts;
7318 for (
unsigned i = 0; i < NumElts; i += 2) {
7319 if (M[i] >= 0 && M[i] != (
int)i)
7321 if (M[i + 1] >= 0 && M[i + 1] != (
int)(
N + i +
Offset))
7330 if (NumElts != M.size())
7338 unsigned Off0 = rev ? NumElts / 2 : 0;
7339 unsigned Off1 = rev ? 0 : NumElts / 2;
7340 for (
unsigned i = 0; i < NumElts; i += 2) {
7341 if (M[i] >= 0 && M[i] != (
int)(Off0 + i / 2))
7343 if (M[i + 1] >= 0 && M[i + 1] != (
int)(Off1 + i / 2))
7359 if (!ST->hasMVEFloatOps())
7364 if (VT != MVT::v8f16)
7385 for (
unsigned i = 1; i < 4; i++) {
7400 return DAG.
getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7412 if (!ST->hasMVEFloatOps())
7417 if (VT != MVT::v4f32)
7433 for (
unsigned i = 1; i < 4; i++) {
7444 return DAG.
getNode(ARMISD::VCVTL, dl, VT, Op0,
7456 Val =
N->getAsZExtVal();
7458 if (ST->isThumb1Only()) {
7459 if (Val <= 255 || ~Val <= 255)
7471 EVT VT =
Op.getValueType();
7473 assert(ST->hasMVEIntegerOps() &&
"LowerBUILD_VECTOR_i1 called without MVE!");
7477 unsigned BitsPerBool;
7481 }
else if (NumElts == 4) {
7484 }
else if (NumElts == 8) {
7487 }
else if (NumElts == 16) {
7498 return U.get().isUndef() || U.get() == FirstOp;
7502 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl,
Op.getValueType(), Ext);
7506 unsigned Bits32 = 0;
7507 for (
unsigned i = 0; i < NumElts; ++i) {
7511 bool BitSet = V.isUndef() ?
false : V->getAsZExtVal();
7513 Bits32 |= BoolMask << (i * BitsPerBool);
7519 for (
unsigned i = 0; i < NumElts; ++i) {
7532 if (!ST->hasMVEIntegerOps())
7536 EVT VT =
Op.getValueType();
7546 if (
N != 1 &&
N != 2 &&
N != 4 &&
N != 8)
7550 for (
unsigned I = 2;
I < NumElts;
I++) {
7566 switch (
N->getOpcode()) {
7577 return N->getOperand(1).getNode() ==
Op;
7579 switch (
N->getConstantOperandVal(0)) {
7580 case Intrinsic::arm_mve_add_predicated:
7581 case Intrinsic::arm_mve_mul_predicated:
7582 case Intrinsic::arm_mve_qadd_predicated:
7583 case Intrinsic::arm_mve_vhadd:
7584 case Intrinsic::arm_mve_hadd_predicated:
7585 case Intrinsic::arm_mve_vqdmulh:
7586 case Intrinsic::arm_mve_qdmulh_predicated:
7587 case Intrinsic::arm_mve_vqrdmulh:
7588 case Intrinsic::arm_mve_qrdmulh_predicated:
7589 case Intrinsic::arm_mve_vqdmull:
7590 case Intrinsic::arm_mve_vqdmull_predicated:
7592 case Intrinsic::arm_mve_sub_predicated:
7593 case Intrinsic::arm_mve_qsub_predicated:
7594 case Intrinsic::arm_mve_vhsub:
7595 case Intrinsic::arm_mve_hsub_predicated:
7596 return N->getOperand(2).getNode() ==
Op;
7611 EVT VT =
Op.getValueType();
7619 APInt SplatBits, SplatUndef;
7620 unsigned SplatBitSize;
7622 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7629 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7631 [BVN](
const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7632 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7633 : SplatBitSize == 16 ? MVT::v8i16
7637 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7640 if ((
ST->hasNEON() && SplatBitSize <= 64) ||
7641 (
ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7646 SplatBitSize, DAG, dl, VmovVT, VT,
VMOVModImm);
7650 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7654 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7656 NegatedImm, SplatUndef.
getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7660 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7664 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7668 return DAG.
getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7674 if (
ST->hasMVEIntegerOps() &&
7675 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7676 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7677 : SplatBitSize == 16 ? MVT::v8i16
7681 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7694 bool isOnlyLowElement =
true;
7695 bool usesOnlyOneValue =
true;
7696 bool hasDominantValue =
false;
7701 DenseMap<SDValue, unsigned> ValueCounts;
7703 for (
unsigned i = 0; i < NumElts; ++i) {
7708 isOnlyLowElement =
false;
7712 unsigned &
Count = ValueCounts[
V];
7715 if (++
Count > (NumElts / 2)) {
7716 hasDominantValue =
true;
7720 if (ValueCounts.
size() != 1)
7721 usesOnlyOneValue =
false;
7722 if (!
Value.getNode() && !ValueCounts.
empty())
7725 if (ValueCounts.
empty())
7737 if (hasDominantValue && EltSize <= 32) {
7746 ConstantSDNode *constIndex;
7753 if (VT !=
Value->getOperand(0).getValueType()) {
7756 N = DAG.
getNode(ARMISD::VDUPLANE, dl, VT,
7761 N = DAG.
getNode(ARMISD::VDUPLANE, dl, VT,
7766 if (!usesOnlyOneValue) {
7769 for (
unsigned I = 0;
I < NumElts; ++
I) {
7774 Ops.push_back(
Op.getOperand(
I));
7784 assert(FVT == MVT::f32 || FVT == MVT::f16);
7785 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
7786 for (
unsigned i = 0; i < NumElts; ++i)
7791 Val = LowerBUILD_VECTOR(Val, DAG, ST);
7795 if (usesOnlyOneValue) {
7798 return DAG.
getNode(ARMISD::VDUP, dl, VT, Val);
7822 if (
ST->hasNEON() && VT.
is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
7842 if (EltSize >= 32) {
7848 for (
unsigned i = 0; i < NumElts; ++i)
7862 for (
unsigned i = 0 ; i < NumElts; ++i) {
7881 EVT VT =
Op.getValueType();
7884 struct ShuffleSourceInfo {
7886 unsigned MinElt = std::numeric_limits<unsigned>::max();
7887 unsigned MaxElt = 0;
7897 int WindowScale = 1;
7899 ShuffleSourceInfo(
SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
7907 for (
unsigned i = 0; i < NumElts; ++i) {
7922 SDValue SourceVec =
V.getOperand(0);
7924 if (Source == Sources.
end())
7928 unsigned EltNo =
V.getConstantOperandVal(1);
7935 if (Sources.
size() > 2)
7941 for (
auto &Source : Sources) {
7942 EVT SrcEltTy =
Source.Vec.getValueType().getVectorElementType();
7943 if (SrcEltTy.
bitsLT(SmallestEltTy))
7944 SmallestEltTy = SrcEltTy;
7946 unsigned ResMultiplier =
7954 for (
auto &Src : Sources) {
7955 EVT SrcVT = Src.ShuffleVec.getValueType();
7959 if (SrcVTSize == VTSize)
7968 if (SrcVTSize < VTSize) {
7969 if (2 * SrcVTSize != VTSize)
7975 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
7979 if (SrcVTSize != 2 * VTSize)
7982 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
7987 if (Src.MinElt >= NumSrcElts) {
7992 Src.WindowBase = -NumSrcElts;
7993 }
else if (Src.MaxElt < NumSrcElts) {
8007 Src.ShuffleVec = DAG.
getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8010 Src.WindowBase = -Src.MinElt;
8017 for (
auto &Src : Sources) {
8018 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8019 if (SrcEltTy == SmallestEltTy)
8022 Src.ShuffleVec = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8024 Src.WindowBase *= Src.WindowScale;
8029 for (
auto Src : Sources)
8030 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
8038 if (
Entry.isUndef())
8047 EVT OrigEltTy =
Entry.getOperand(0).getValueType().getVectorElementType();
8050 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8054 int *LaneMask = &
Mask[i * ResMultiplier];
8056 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8057 ExtractBase += NumElts * (Src - Sources.begin());
8058 for (
int j = 0;
j < LanesDefined; ++
j)
8059 LaneMask[j] = ExtractBase + j;
8065 assert(Sources.size() <= 2 &&
"Too many sources!");
8068 for (
unsigned i = 0; i < Sources.size(); ++i)
8075 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8097 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8117 unsigned PFIndexes[4];
8118 for (
unsigned i = 0; i != 4; ++i) {
8122 PFIndexes[i] = M[i];
8126 unsigned PFTableIndex =
8127 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8129 unsigned Cost = (PFEntry >> 30);
8135 bool ReverseVEXT, isV_UNDEF;
8136 unsigned Imm, WhichResult;
8139 if (EltSize >= 32 ||
8146 else if (Subtarget->hasNEON() &&
8151 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8154 else if (Subtarget->hasMVEIntegerOps() &&
8158 else if (Subtarget->hasMVEIntegerOps() &&
8172 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8173 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8174 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8177 if (LHSID == (1*9+2)*9+3)
return LHS;
8178 assert(LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!");
8192 return DAG.
getNode(ARMISD::VREV64, dl, VT, OpLHS);
8195 return DAG.
getNode(ARMISD::VREV32, dl, VT, OpLHS);
8198 return DAG.
getNode(ARMISD::VREV16, dl, VT, OpLHS);
8203 return DAG.
getNode(ARMISD::VDUPLANE, dl, VT,
8208 return DAG.
getNode(ARMISD::VEXT, dl, VT,
8235 for (
int I : ShuffleMask)
8239 return DAG.
getNode(ARMISD::VTBL1,
DL, MVT::v8i8, V1,
8242 return DAG.
getNode(ARMISD::VTBL2,
DL, MVT::v8i8, V1, V2,
8248 EVT VT =
Op.getValueType();
8250 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8251 "Expect an v8i16/v16i8 type");
8257 std::vector<int> NewMask;
8261 NewMask.push_back(i);
8291 AllZeroes = DAG.
getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8301 if (VT != MVT::v16i1)
8302 RecastV1 = DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8317 EVT VT =
Op.getValueType();
8321 assert(ST->hasMVEIntegerOps() &&
8322 "No support for vector shuffle of boolean predicates");
8332 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8348 "Expected identical vector type in expanded i1 shuffle!");
8352 PredAsVector2, ShuffleMask);
8357 if (VT == MVT::v2i1) {
8358 SDValue BC = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8361 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8363 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8374 EVT VT =
Op.getValueType();
8378 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8379 "Unexpected vector type");
8381 int QuarterSize = NumElts / 4;
8390 for (
int i = 0; i <
Length; i++) {
8391 if (ShuffleMask[Start + i] >= 0) {
8392 if (ShuffleMask[Start + i] %
Length != i)
8394 MovIdx = ShuffleMask[Start + i] /
Length;
8402 for (
int i = 1; i <
Length; i++) {
8403 if (ShuffleMask[Start + i] >= 0 &&
8404 (ShuffleMask[Start + i] /
Length != MovIdx ||
8405 ShuffleMask[Start + i] %
Length != i))
8411 for (
int Part = 0; Part < 4; ++Part) {
8413 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8427 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8432 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8434 for (
int Part = 0; Part < 4; ++Part)
8435 for (
int i = 0; i < QuarterSize; i++)
8437 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8439 VT, dl,
Op->getOperand(0),
Op->getOperand(1), NewShuffleMask);
8442 for (
int Part = 0; Part < 4; ++Part)
8458 EVT VT =
Op.getValueType();
8470 for (
int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8474 if (Mask[i] != i + BaseOffset) {
8475 if (OffElement == -1)
8481 return NonUndef > 2 && OffElement != -1;
8485 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8487 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8498 ShuffleMask[OffElement] < (
int)NumElts ? V1 : V2,
8509 EVT VT =
Op.getValueType();
8513 if (ST->hasMVEIntegerOps() && EltSize == 1)
8524 if (EltSize <= 32) {
8528 if (Lane == -1) Lane = 0;
8539 bool IsScalarToVector =
true;
8542 IsScalarToVector =
false;
8545 if (IsScalarToVector)
8548 return DAG.
getNode(ARMISD::VDUPLANE, dl, VT, V1,
8552 bool ReverseVEXT =
false;
8554 if (ST->hasNEON() &&
isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8557 return DAG.
getNode(ARMISD::VEXT, dl, VT, V1, V2,
8562 return DAG.
getNode(ARMISD::VREV64, dl, VT, V1);
8564 return DAG.
getNode(ARMISD::VREV32, dl, VT, V1);
8566 return DAG.
getNode(ARMISD::VREV16, dl, VT, V1);
8569 return DAG.
getNode(ARMISD::VEXT, dl, VT, V1, V1,
8578 unsigned WhichResult = 0;
8579 bool isV_UNDEF =
false;
8580 if (ST->hasNEON()) {
8582 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8589 if (ST->hasMVEIntegerOps()) {
8591 return DAG.
getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8594 return DAG.
getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8597 return DAG.
getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8624 }) &&
"Unexpected shuffle index into UNDEF operand!");
8627 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8630 assert((WhichResult == 0) &&
8631 "In-place shuffle of concat can only have one result!");
8640 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8644 for (
bool Top : {
false,
true}) {
8645 for (
bool SingleSource : {
false,
true}) {
8646 if (
isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8651 SingleSource ? V1 : V2);
8667 unsigned PFIndexes[4];
8668 for (
unsigned i = 0; i != 4; ++i) {
8669 if (ShuffleMask[i] < 0)
8672 PFIndexes[i] = ShuffleMask[i];
8676 unsigned PFTableIndex =
8677 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8679 unsigned Cost = (PFEntry >> 30);
8685 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8686 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8696 if (EltSize >= 32) {
8704 for (
unsigned i = 0; i < NumElts; ++i) {
8705 if (ShuffleMask[i] < 0)
8709 ShuffleMask[i] < (
int)NumElts ? V1 : V2,
8717 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8721 if (ST->hasNEON() && VT == MVT::v8i8)
8725 if (ST->hasMVEIntegerOps())
8734 EVT VecVT =
Op.getOperand(0).getValueType();
8737 assert(ST->hasMVEIntegerOps() &&
8738 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8741 DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32,
Op->getOperand(0));
8742 unsigned Lane =
Op.getConstantOperandVal(2);
8743 unsigned LaneWidth =
8745 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8750 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl,
Op.getValueType(), BFI);
8763 if (Subtarget->hasMVEIntegerOps() &&
8764 Op.getValueType().getScalarSizeInBits() == 1)
8788 IVecIn, IElt, Lane);
8797 EVT VecVT =
Op.getOperand(0).getValueType();
8800 assert(ST->hasMVEIntegerOps() &&
8801 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8804 DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32,
Op->getOperand(0));
8805 unsigned Lane =
Op.getConstantOperandVal(1);
8806 unsigned LaneWidth =
8828 return DAG.
getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
8837 assert(
Op.getValueType().getScalarSizeInBits() == 1 &&
8838 "Unexpected custom CONCAT_VECTORS lowering");
8840 "Unexpected custom CONCAT_VECTORS lowering");
8841 assert(ST->hasMVEIntegerOps() &&
8842 "CONCAT_VECTORS lowering only supported for MVE");
8846 EVT Op2VT = V2.getValueType();
8847 assert(Op1VT == Op2VT &&
"Operand types don't match!");
8848 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
8849 "Unexpected i1 concat operations!");
8862 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
8867 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, ConVec,
8876 auto ExtractInto = [&DAG, &dl](
SDValue NewV,
SDValue ConVec,
unsigned &j) {
8877 EVT NewVT = NewV.getValueType();
8878 EVT ConcatVT = ConVec.getValueType();
8879 unsigned ExtScale = 1;
8880 if (NewVT == MVT::v2f64) {
8881 NewV = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
8894 ConVec = ExtractInto(NewV1, ConVec, j);
8895 ConVec = ExtractInto(NewV2, ConVec, j);
8899 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, ConVec,
8905 while (ConcatOps.
size() > 1) {
8906 for (
unsigned I = 0,
E = ConcatOps.
size();
I !=
E;
I += 2) {
8909 ConcatOps[
I / 2] = ConcatPair(V1, V2);
8913 return ConcatOps[0];
8918 EVT VT =
Op->getValueType(0);
8924 assert(
Op.getValueType().is128BitVector() &&
Op.getNumOperands() == 2 &&
8925 "unexpected CONCAT_VECTORS");
8946 EVT VT =
Op.getValueType();
8952 "Unexpected custom EXTRACT_SUBVECTOR lowering");
8953 assert(ST->hasMVEIntegerOps() &&
8954 "EXTRACT_SUBVECTOR lowering only supported for MVE");
8964 EVT SubVT = MVT::v4i32;
8966 for (
unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
8976 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8981 for (
unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
8990 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, SubVec,
8997 assert(ST->hasMVEIntegerOps() &&
"Expected MVE!");
8998 EVT VT =
N->getValueType(0);
8999 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9000 "Expected a vector i1 type!");
9002 EVT FromVT =
Op.getValueType();
9013 if (!Subtarget->hasMVEIntegerOps())
9016 EVT ToVT =
N->getValueType(0);
9059 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9061 EVT FromVT =
N->getOperand(0).getValueType();
9062 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9073 if (!Subtarget->hasMVEIntegerOps())
9078 EVT ToVT =
N->getValueType(0);
9079 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9082 EVT FromVT =
Op.getValueType();
9083 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9097 Ext = DAG.
getNode(
N->getOpcode(),
DL, MVT::v8i32, Ext);
9098 Ext1 = DAG.
getNode(
N->getOpcode(),
DL, MVT::v8i32, Ext1);
9110 EVT VT =
N->getValueType(0);
9112 SDNode *BVN =
N->getOperand(0).getNode();
9117 unsigned HiElt = 1 - LoElt;
9122 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9138 for (
unsigned i = 0, e =
N->getNumOperands(); i != e; ++i) {
9139 SDNode *Elt =
N->getOperand(i).getNode();
9142 unsigned HalfSize = EltSize / 2;
9144 if (!
isIntN(HalfSize,
C->getSExtValue()))
9147 if (!
isUIntN(HalfSize,
C->getZExtValue()))
9186 switch (OrigSimpleTy) {
9202 unsigned ExtOpcode) {
9225 if (ExtendedTy == LD->getMemoryVT())
9226 return DAG.
getLoad(LD->getMemoryVT(),
SDLoc(LD), LD->getChain(),
9227 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9228 LD->getMemOperand()->getFlags());
9234 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9235 LD->getMemoryVT(), LD->getAlign(),
9236 LD->getMemOperand()->getFlags());
9249 N->getOperand(0)->getValueType(0),
9255 "Expected extending load");
9261 DAG.
getNode(Opcode,
SDLoc(newLoad), LD->getValueType(0), newLoad);
9270 SDNode *BVN =
N->getOperand(0).getNode();
9272 BVN->
getValueType(0) == MVT::v4i32 &&
"expected v4i32 BUILD_VECTOR");
9280 EVT VT =
N->getValueType(0);
9286 for (
unsigned i = 0; i != NumElts; ++i) {
9287 const APInt &CInt =
N->getConstantOperandAPInt(i);
9296 unsigned Opcode =
N->getOpcode();
9298 SDNode *N0 =
N->getOperand(0).getNode();
9299 SDNode *N1 =
N->getOperand(1).getNode();
9307 unsigned Opcode =
N->getOpcode();
9309 SDNode *N0 =
N->getOperand(0).getNode();
9310 SDNode *N1 =
N->getOperand(1).getNode();
9320 EVT VT =
Op.getValueType();
9322 "unexpected type for custom-lowering ISD::MUL");
9323 SDNode *N0 =
Op.getOperand(0).getNode();
9324 SDNode *N1 =
Op.getOperand(1).getNode();
9325 unsigned NewOpc = 0;
9329 if (isN0SExt && isN1SExt)
9330 NewOpc = ARMISD::VMULLs;
9334 if (isN0ZExt && isN1ZExt)
9335 NewOpc = ARMISD::VMULLu;
9336 else if (isN1SExt || isN1ZExt) {
9340 NewOpc = ARMISD::VMULLs;
9343 NewOpc = ARMISD::VMULLu;
9347 NewOpc = ARMISD::VMULLu;
9353 if (VT == MVT::v2i64)
9370 "unexpected types for extended operands to VMULL");
9371 return DAG.
getNode(NewOpc,
DL, VT, Op0, Op1);
9406 DAG.
getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9440 DAG.
getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9443 DAG.
getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9464 EVT VT =
Op.getValueType();
9465 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9466 "unexpected type for custom-lowering ISD::SDIV");
9473 if (VT == MVT::v8i8) {
9501 EVT VT =
Op.getValueType();
9502 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9503 "unexpected type for custom-lowering ISD::UDIV");
9510 if (VT == MVT::v8i8) {
9549 DAG.
getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9552 DAG.
getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9556 DAG.
getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9577 EVT VT =
N->getValueType(0);
9590 Result = DAG.
getNode(ARMISD::ADDE,
DL, VTs,
Op.getOperand(0),
9591 Op.getOperand(1), Carry);
9604 Result = DAG.
getNode(ARMISD::SUBE,
DL, VTs,
Op.getOperand(0),
9605 Op.getOperand(1), Carry);
9622 EVT VT =
Op.getValueType();
9623 assert((VT == MVT::i32 || VT == MVT::i64) &&
9624 "unexpected type for custom lowering DIV");
9630 LC = VT == MVT::i32 ? RTLIB::SDIVREM_I32 : RTLIB::SDIVREM_I64;
9632 LC = VT == MVT::i32 ? RTLIB::UDIVREM_I32 : RTLIB::UDIVREM_I64;
9639 for (
auto AI : {1, 0}) {
9641 Args.emplace_back(Operand,
9648 ES, std::move(Args));
9658ARMTargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
9666 const bool MinSize =
ST.hasMinSize();
9667 const bool HasDivide =
ST.isThumb() ?
ST.hasDivideInThumbMode()
9668 :
ST.hasDivideInARMMode();
9672 if (
N->getOperand(0).getValueType().isVector())
9677 if (!(MinSize && HasDivide))
9690 if (Divisor.
sgt(128))
9698 assert(
Op.getValueType() == MVT::i32 &&
9699 "unexpected type for custom lowering DIV");
9702 SDValue DBZCHK = DAG.
getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
9705 return LowerWindowsDIVLibCall(
Op, DAG,
Signed, DBZCHK);
9711 if (
N->getValueType(0) == MVT::i32)
9712 return DAG.
getNode(ARMISD::WIN__DBZCHK,
DL, MVT::Other, InChain,
Op);
9715 return DAG.
getNode(ARMISD::WIN__DBZCHK,
DL, MVT::Other, InChain,
9719void ARMTargetLowering::ExpandDIV_Windows(
9724 assert(
Op.getValueType() == MVT::i64 &&
9725 "unexpected type for custom lowering DIV");
9742 EVT MemVT = LD->getMemoryVT();
9743 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9744 MemVT == MVT::v16i1) &&
9745 "Expected a predicate type!");
9746 assert(MemVT ==
Op.getValueType());
9748 "Expected a non-extending load");
9749 assert(LD->isUnindexed() &&
"Expected a unindexed load");
9763 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
9765 LD->getMemOperand());
9771 SDValue Pred = DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
9772 if (MemVT != MVT::v16i1)
9781 EVT MemVT =
LD->getMemoryVT();
9782 assert(
LD->isUnindexed() &&
"Loads should be unindexed at this point.");
9784 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9785 !Subtarget->isThumb1Only() &&
LD->isVolatile() &&
9786 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
9789 ARMISD::LDRD, dl, DAG.
getVTList({MVT::i32, MVT::i32, MVT::Other}),
9790 {LD->getChain(), LD->getBasePtr()}, MemVT,
LD->getMemOperand());
9800 EVT MemVT = ST->getMemoryVT();
9801 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9802 MemVT == MVT::v16i1) &&
9803 "Expected a predicate type!");
9804 assert(MemVT == ST->getValue().getValueType());
9805 assert(!ST->isTruncatingStore() &&
"Expected a non-extending store");
9806 assert(ST->isUnindexed() &&
"Expected a unindexed store");
9811 SDValue Build = ST->getValue();
9812 if (MemVT != MVT::v16i1) {
9825 SDValue GRP = DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
9831 ST->getChain(), dl, GRP, ST->getBasePtr(),
9833 ST->getMemOperand());
9839 EVT MemVT = ST->getMemoryVT();
9840 assert(ST->isUnindexed() &&
"Stores should be unindexed at this point.");
9842 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9858 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
9859 MemVT, ST->getMemOperand());
9860 }
else if (Subtarget->hasMVEIntegerOps() &&
9861 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9862 MemVT == MVT::v16i1))) {
9871 (
N->getOpcode() == ARMISD::VMOVIMM &&
9877 MVT VT =
Op.getSimpleValueType();
9879 SDValue PassThru =
N->getPassThru();
9890 VT, dl,
N->getChain(),
N->getBasePtr(),
N->getOffset(), Mask, ZeroVec,
9891 N->getMemoryVT(),
N->getMemOperand(),
N->getAddressingMode(),
9892 N->getExtensionType(),
N->isExpandingLoad());
9895 PassThru.
getOpcode() == ARMISD::VECTOR_REG_CAST) &&
9897 if (!PassThru.
isUndef() && !PassThruIsCastZero)
9904 if (!ST->hasMVEIntegerOps())
9908 unsigned BaseOpcode = 0;
9909 switch (
Op->getOpcode()) {
9925 unsigned NumActiveLanes = NumElts;
9927 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
9928 NumActiveLanes == 2) &&
9929 "Only expected a power 2 vector size");
9933 while (NumActiveLanes > 4) {
9934 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
9936 Op0 = DAG.
getNode(BaseOpcode, dl, VT, Op0, Rev);
9937 NumActiveLanes /= 2;
9941 if (NumActiveLanes == 4) {
9951 SDValue Res0 = DAG.
getNode(BaseOpcode, dl, EltVT, Ext0, Ext1,
Op->getFlags());
9952 SDValue Res1 = DAG.
getNode(BaseOpcode, dl, EltVT, Ext2, Ext3,
Op->getFlags());
9953 Res = DAG.
getNode(BaseOpcode, dl, EltVT, Res0, Res1,
Op->getFlags());
9959 Res = DAG.
getNode(BaseOpcode, dl, EltVT, Ext0, Ext1,
Op->getFlags());
9963 if (EltVT !=
Op->getValueType(0))
9970 if (!ST->hasMVEFloatOps())
9985 unsigned PairwiseIntrinsic = 0;
9986 switch (
Op->getOpcode()) {
9990 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
9993 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
9996 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
9999 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10005 unsigned NumActiveLanes = NumElts;
10007 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10008 NumActiveLanes == 2) &&
10009 "Only expected a power 2 vector size");
10015 VT =
Lo.getValueType();
10017 NumActiveLanes /= 2;
10021 while (NumActiveLanes > 1) {
10023 NumActiveLanes /= 2;
10030 if (EltVT !=
Op.getValueType()) {
10031 unsigned Extend = 0;
10032 switch (
Op->getOpcode()) {
10044 Res = DAG.
getNode(Extend, dl,
Op.getValueType(), Res);
10089 const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
10095 SDLoc dl(V.getNode());
10096 auto [VLo, VHi] = DAG.
SplitScalar(V, dl, MVT::i32, MVT::i32);
10106 assert(
N->getValueType(0) == MVT::i64 &&
10107 "AtomicCmpSwap on types less than 64 should be legal");
10116 ARM::CMP_SWAP_64,
SDLoc(
N),
10117 DAG.
getVTList(MVT::Untyped, MVT::Untyped, MVT::Other),
Ops);
10136 EVT VT =
Op.getValueType();
10145 if (isUnsupportedFloatingType(
LHS.getValueType())) {
10147 Chain, IsSignaling);
10148 if (!
RHS.getNode()) {
10164 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
10166 ARMcc = DAG.
getConstant(CondCode2, dl, MVT::i32);
10167 Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
10184 MVT SVT =
Op.getOperand(0).getSimpleValueType();
10187 makeLibCall(DAG, LC, MVT::f32,
Op.getOperand(0), CallOptions,
DL).first;
10200 if (!IsSigned && Subtarget->isThumb1Only()) {
10218 Sub1Result, Sub1Result, Flags1);
10233 if (
Op.getValueType() != MVT::i32)
10247 unsigned Opcode = ARMISD::SUBC;
10256 bool CanUseAdd =
false;
10272 Opcode = ARMISD::ADDC;
10296 SDValue Result1 = DAG.
getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One,
10297 GTCondValue, Flags);
10301 SDValue Result2 = DAG.
getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne,
10302 LTCondValue, Flags);
10304 if (
Op.getValueType() != MVT::i32)
10312 switch (
Op.getOpcode()) {
10344 case ISD::BITCAST:
return ExpandBITCAST(
Op.getNode(), DAG, Subtarget);
10348 case ISD::SREM:
return LowerREM(
Op.getNode(), DAG);
10349 case ISD::UREM:
return LowerREM(
Op.getNode(), DAG);
10371 return LowerSET_FPMODE(
Op, DAG);
10373 return LowerRESET_FPMODE(
Op, DAG);
10376 if (Subtarget->isTargetWindows() && !
Op.getValueType().isVector())
10377 return LowerDIV_Windows(
Op, DAG,
true);
10380 if (Subtarget->isTargetWindows() && !
Op.getValueType().isVector())
10381 return LowerDIV_Windows(
Op, DAG,
false);
10388 return LowerSignedALUO(
Op, DAG);
10391 return LowerUnsignedALUO(
Op, DAG);
10424 if (Subtarget->isTargetWindows())
10425 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
10434 return LowerSPONENTRY(
Op, DAG);
10436 return LowerFP_TO_BF16(
Op, DAG);
10437 case ARMISD::WIN__DBZCHK:
return SDValue();
10440 return LowerCMP(
Op, DAG);
10442 return LowerABS(
Op, DAG);
10447 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
10448 Op.getOperand(1).getValueType() == MVT::bf16) &&
10449 "Expected custom lowering of rounding operations only for f16");
10452 {
Op.getOperand(0),
Op.getOperand(1)});
10453 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
10454 {Ext.getValue(1), Ext.getValue(0)});
10461 unsigned IntNo =
N->getConstantOperandVal(0);
10463 if (IntNo == Intrinsic::arm_smlald)
10464 Opc = ARMISD::SMLALD;
10465 else if (IntNo == Intrinsic::arm_smlaldx)
10466 Opc = ARMISD::SMLALDX;
10467 else if (IntNo == Intrinsic::arm_smlsld)
10468 Opc = ARMISD::SMLSLD;
10469 else if (IntNo == Intrinsic::arm_smlsldx)
10470 Opc = ARMISD::SMLSLDX;
10476 std::tie(
Lo,
Hi) = DAG.
SplitScalar(
N->getOperand(3), dl, MVT::i32, MVT::i32);
10480 N->getOperand(1),
N->getOperand(2),
10492 switch (
N->getOpcode()) {
10499 Res = ExpandBITCAST(
N, DAG, Subtarget);
10508 Res = LowerREM(
N, DAG);
10512 Res = LowerDivRem(
SDValue(
N, 0), DAG);
10528 assert(Subtarget->isTargetWindows() &&
"can only expand DIV on Windows");
10566 "ROPI/RWPI not currently supported with SjLj");
10575 bool isThumb = Subtarget->isThumb();
10576 bool isThumb2 = Subtarget->
isThumb2();
10579 unsigned PCAdj = (
isThumb || isThumb2) ? 4 : 8;
10585 : &ARM::GPRRegClass;
10603 Register NewVReg1 =
MRI->createVirtualRegister(TRC);
10609 Register NewVReg2 =
MRI->createVirtualRegister(TRC);
10615 Register NewVReg3 =
MRI->createVirtualRegister(TRC);
10633 Register NewVReg1 =
MRI->createVirtualRegister(TRC);
10638 Register NewVReg2 =
MRI->createVirtualRegister(TRC);
10643 Register NewVReg3 =
MRI->createVirtualRegister(TRC);
10648 Register NewVReg4 =
MRI->createVirtualRegister(TRC);
10654 Register NewVReg5 =
MRI->createVirtualRegister(TRC);
10669 Register NewVReg1 =
MRI->createVirtualRegister(TRC);
10675 Register NewVReg2 =
MRI->createVirtualRegister(TRC);
10691 const TargetInstrInfo *
TII = Subtarget->getInstrInfo();
10698 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10699 : &ARM::GPRnopcRegClass;
10703 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
10704 unsigned MaxCSNum = 0;
10705 for (MachineBasicBlock &BB : *MF) {
10711 for (MachineInstr &
II : BB) {
10712 if (!
II.isEHLabel())
10715 MCSymbol *Sym =
II.getOperand(0).getMCSymbol();
10716 if (!MF->hasCallSiteLandingPad(Sym))
continue;
10718 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10719 for (
unsigned Idx : CallSiteIdxs) {
10720 CallSiteNumToLPad[Idx].push_back(&BB);
10721 MaxCSNum = std::max(MaxCSNum, Idx);
10728 std::vector<MachineBasicBlock*> LPadList;
10729 SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
10730 LPadList.reserve(CallSiteNumToLPad.
size());
10731 for (
unsigned I = 1;
I <= MaxCSNum; ++
I) {
10732 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[
I];
10733 for (MachineBasicBlock *
MBB : MBBList) {
10734 LPadList.push_back(
MBB);
10739 assert(!LPadList.empty() &&
10740 "No landing pad destinations for the dispatch jump table!");
10743 MachineJumpTableInfo *JTI =
10750 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10753 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10755 BuildMI(TrapBB, dl,
TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
10758 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10762 MF->insert(MF->end(), DispatchBB);
10763 MF->insert(MF->end(), DispContBB);
10764 MF->insert(MF->end(), TrapBB);
10768 SetupEntryBlockForSjLj(
MI,
MBB, DispatchBB, FI);
10770 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10774 MachineInstrBuilder MIB;
10775 MIB =
BuildMI(DispatchBB, dl,
TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10777 const ARMBaseInstrInfo *AII =
static_cast<const ARMBaseInstrInfo*
>(
TII);
10787 unsigned NumLPads = LPadList.size();
10788 if (Subtarget->isThumb2()) {
10789 Register NewVReg1 =
MRI->createVirtualRegister(TRC);
10790 BuildMI(DispatchBB, dl,
TII->get(ARM::t2LDRi12), NewVReg1)
10796 if (NumLPads < 256) {
10797 BuildMI(DispatchBB, dl,
TII->get(ARM::t2CMPri))
10799 .
addImm(LPadList.size())
10802 Register VReg1 =
MRI->createVirtualRegister(TRC);
10803 BuildMI(DispatchBB, dl,
TII->get(ARM::t2MOVi16), VReg1)
10804 .
addImm(NumLPads & 0xFFFF)
10807 unsigned VReg2 = VReg1;
10808 if ((NumLPads & 0xFFFF0000) != 0) {
10809 VReg2 =
MRI->createVirtualRegister(TRC);
10810 BuildMI(DispatchBB, dl,
TII->get(ARM::t2MOVTi16), VReg2)
10816 BuildMI(DispatchBB, dl,
TII->get(ARM::t2CMPrr))
10822 BuildMI(DispatchBB, dl,
TII->get(ARM::t2Bcc))
10827 Register NewVReg3 =
MRI->createVirtualRegister(TRC);
10828 BuildMI(DispContBB, dl,
TII->get(ARM::t2LEApcrelJT), NewVReg3)
10832 Register NewVReg4 =
MRI->createVirtualRegister(TRC);
10833 BuildMI(DispContBB, dl,
TII->get(ARM::t2ADDrs), NewVReg4)
10840 BuildMI(DispContBB, dl,
TII->get(ARM::t2BR_JT))
10844 }
else if (Subtarget->isThumb()) {
10845 Register NewVReg1 =
MRI->createVirtualRegister(TRC);
10846 BuildMI(DispatchBB, dl,
TII->get(ARM::tLDRspi), NewVReg1)
10852 if (NumLPads < 256) {
10853 BuildMI(DispatchBB, dl,
TII->get(ARM::tCMPi8))
10858 MachineConstantPool *
ConstantPool = MF->getConstantPool();
10863 Align Alignment = MF->getDataLayout().getPrefTypeAlign(
Int32Ty);
10864 unsigned Idx =
ConstantPool->getConstantPoolIndex(
C, Alignment);
10866 Register VReg1 =
MRI->createVirtualRegister(TRC);
10867 BuildMI(DispatchBB, dl,
TII->get(ARM::tLDRpci))
10871 BuildMI(DispatchBB, dl,
TII->get(ARM::tCMPr))
10877 BuildMI(DispatchBB, dl,
TII->get(ARM::tBcc))
10882 Register NewVReg2 =
MRI->createVirtualRegister(TRC);
10883 BuildMI(DispContBB, dl,
TII->get(ARM::tLSLri), NewVReg2)
10889 Register NewVReg3 =
MRI->createVirtualRegister(TRC);
10890 BuildMI(DispContBB, dl,
TII->get(ARM::tLEApcrelJT), NewVReg3)
10894 Register NewVReg4 =
MRI->createVirtualRegister(TRC);
10895 BuildMI(DispContBB, dl,
TII->get(ARM::tADDrr), NewVReg4)
10901 MachineMemOperand *JTMMOLd =
10905 Register NewVReg5 =
MRI->createVirtualRegister(TRC);
10906 BuildMI(DispContBB, dl,
TII->get(ARM::tLDRi), NewVReg5)
10912 unsigned NewVReg6 = NewVReg5;
10913 if (IsPositionIndependent) {
10914 NewVReg6 =
MRI->createVirtualRegister(TRC);
10915 BuildMI(DispContBB, dl,
TII->get(ARM::tADDrr), NewVReg6)
10922 BuildMI(DispContBB, dl,
TII->get(ARM::tBR_JTr))
10926 Register NewVReg1 =
MRI->createVirtualRegister(TRC);
10927 BuildMI(DispatchBB, dl,
TII->get(ARM::LDRi12), NewVReg1)
10933 if (NumLPads < 256) {
10934 BuildMI(DispatchBB, dl,
TII->get(ARM::CMPri))
10938 }
else if (Subtarget->hasV6T2Ops() &&
isUInt<16>(NumLPads)) {
10939 Register VReg1 =
MRI->createVirtualRegister(TRC);
10940 BuildMI(DispatchBB, dl,
TII->get(ARM::MOVi16), VReg1)
10941 .
addImm(NumLPads & 0xFFFF)
10944 unsigned VReg2 = VReg1;
10945 if ((NumLPads & 0xFFFF0000) != 0) {
10946 VReg2 =
MRI->createVirtualRegister(TRC);
10947 BuildMI(DispatchBB, dl,
TII->get(ARM::MOVTi16), VReg2)
10953 BuildMI(DispatchBB, dl,
TII->get(ARM::CMPrr))
10958 MachineConstantPool *
ConstantPool = MF->getConstantPool();
10963 Align Alignment = MF->getDataLayout().getPrefTypeAlign(
Int32Ty);
10964 unsigned Idx =
ConstantPool->getConstantPoolIndex(
C, Alignment);
10966 Register VReg1 =
MRI->createVirtualRegister(TRC);
10967 BuildMI(DispatchBB, dl,
TII->get(ARM::LDRcp))
10972 BuildMI(DispatchBB, dl,
TII->get(ARM::CMPrr))
10983 Register NewVReg3 =
MRI->createVirtualRegister(TRC);
10984 BuildMI(DispContBB, dl,
TII->get(ARM::MOVsi), NewVReg3)
10989 Register NewVReg4 =
MRI->createVirtualRegister(TRC);
10990 BuildMI(DispContBB, dl,
TII->get(ARM::LEApcrelJT), NewVReg4)
10994 MachineMemOperand *JTMMOLd =
10997 Register NewVReg5 =
MRI->createVirtualRegister(TRC);
10998 BuildMI(DispContBB, dl,
TII->get(ARM::LDRrs), NewVReg5)
11005 if (IsPositionIndependent) {
11006 BuildMI(DispContBB, dl,
TII->get(ARM::BR_JTadd))
11011 BuildMI(DispContBB, dl,
TII->get(ARM::BR_JTr))
11018 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
11019 for (MachineBasicBlock *CurMBB : LPadList) {
11020 if (SeenMBBs.
insert(CurMBB).second)
11027 for (MachineBasicBlock *BB : InvokeBBs) {
11031 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11032 while (!Successors.empty()) {
11033 MachineBasicBlock *SMBB = Successors.pop_back_val();
11035 BB->removeSuccessor(SMBB);
11041 BB->normalizeSuccProbs();
11048 II = BB->rbegin(), IE = BB->rend();
II != IE; ++
II) {
11049 if (!
II->isCall())
continue;
11051 DenseSet<unsigned> DefRegs;
11053 OI =
II->operands_begin(), OE =
II->operands_end();
11055 if (!OI->isReg())
continue;
11056 DefRegs.
insert(OI->getReg());
11059 MachineInstrBuilder MIB(*MF, &*
II);
11061 for (
unsigned i = 0; SavedRegs[i] != 0; ++i) {
11062 unsigned Reg = SavedRegs[i];
11063 if (Subtarget->isThumb2() &&
11064 !ARM::tGPRRegClass.contains(
Reg) &&
11065 !ARM::hGPRRegClass.contains(
Reg))
11067 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(
Reg))
11069 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(
Reg))
11081 for (MachineBasicBlock *MBBLPad : MBBLPads)
11082 MBBLPad->setIsEHPad(
false);
11085 MI.eraseFromParent();
11098static unsigned getLdOpcode(
unsigned LdSize,
bool IsThumb1,
bool IsThumb2) {
11100 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11101 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11103 return LdSize == 4 ? ARM::tLDRi
11104 : LdSize == 2 ? ARM::tLDRHi
11105 : LdSize == 1 ? ARM::tLDRBi : 0;
11107 return LdSize == 4 ? ARM::t2LDR_POST
11108 : LdSize == 2 ? ARM::t2LDRH_POST
11109 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11110 return LdSize == 4 ? ARM::LDR_POST_IMM
11111 : LdSize == 2 ? ARM::LDRH_POST
11112 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11117static unsigned getStOpcode(
unsigned StSize,
bool IsThumb1,
bool IsThumb2) {
11119 return StSize == 16 ? ARM::VST1q32wb_fixed
11120 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11122 return StSize == 4 ? ARM::tSTRi
11123 : StSize == 2 ? ARM::tSTRHi
11124 : StSize == 1 ? ARM::tSTRBi : 0;
11126 return StSize == 4 ? ARM::t2STR_POST
11127 : StSize == 2 ? ARM::t2STRH_POST
11128 : StSize == 1 ? ARM::t2STRB_POST : 0;
11129 return StSize == 4 ? ARM::STR_POST_IMM
11130 : StSize == 2 ? ARM::STRH_POST
11131 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11138 unsigned LdSize,
unsigned Data,
unsigned AddrIn,
11139 unsigned AddrOut,
bool IsThumb1,
bool IsThumb2) {
11140 unsigned LdOpc =
getLdOpcode(LdSize, IsThumb1, IsThumb2);
11141 assert(LdOpc != 0 &&
"Should have a load opcode");
11148 }
else if (IsThumb1) {
11154 BuildMI(*BB, Pos, dl,
TII->get(ARM::tADDi8), AddrOut)
11159 }
else if (IsThumb2) {
11179 unsigned StSize,
unsigned Data,
unsigned AddrIn,
11180 unsigned AddrOut,
bool IsThumb1,
bool IsThumb2) {
11181 unsigned StOpc =
getStOpcode(StSize, IsThumb1, IsThumb2);
11182 assert(StOpc != 0 &&
"Should have a store opcode");
11184 BuildMI(*BB, Pos, dl,
TII->get(StOpc), AddrOut)
11189 }
else if (IsThumb1) {
11196 BuildMI(*BB, Pos, dl,
TII->get(ARM::tADDi8), AddrOut)
11201 }
else if (IsThumb2) {
11202 BuildMI(*BB, Pos, dl,
TII->get(StOpc), AddrOut)
11208 BuildMI(*BB, Pos, dl,
TII->get(StOpc), AddrOut)
11223 const TargetInstrInfo *
TII = Subtarget->getInstrInfo();
11229 unsigned SizeVal =
MI.getOperand(2).getImm();
11230 unsigned Alignment =
MI.getOperand(3).getImm();
11235 unsigned UnitSize = 0;
11236 const TargetRegisterClass *TRC =
nullptr;
11237 const TargetRegisterClass *VecTRC =
nullptr;
11239 bool IsThumb1 = Subtarget->isThumb1Only();
11240 bool IsThumb2 = Subtarget->isThumb2();
11241 bool IsThumb = Subtarget->isThumb();
11243 if (Alignment & 1) {
11245 }
else if (Alignment & 2) {
11250 Subtarget->hasNEON()) {
11251 if ((Alignment % 16 == 0) && SizeVal >= 16)
11253 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11262 bool IsNeon = UnitSize >= 8;
11263 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11265 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11266 : UnitSize == 8 ? &ARM::DPRRegClass
11269 unsigned BytesLeft = SizeVal % UnitSize;
11270 unsigned LoopSize = SizeVal - BytesLeft;
11272 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11276 unsigned srcIn = src;
11277 unsigned destIn = dest;
11278 for (
unsigned i = 0; i < LoopSize; i+=UnitSize) {
11279 Register srcOut =
MRI.createVirtualRegister(TRC);
11280 Register destOut =
MRI.createVirtualRegister(TRC);
11281 Register scratch =
MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11283 IsThumb1, IsThumb2);
11285 IsThumb1, IsThumb2);
11293 for (
unsigned i = 0; i < BytesLeft; i++) {
11294 Register srcOut =
MRI.createVirtualRegister(TRC);
11295 Register destOut =
MRI.createVirtualRegister(TRC);
11296 Register scratch =
MRI.createVirtualRegister(TRC);
11298 IsThumb1, IsThumb2);
11300 IsThumb1, IsThumb2);
11304 MI.eraseFromParent();
11330 MF->
insert(It, loopMBB);
11331 MF->
insert(It, exitMBB);
11334 unsigned CallFrameSize =
TII->getCallFrameSizeAt(
MI);
11344 Register varEnd =
MRI.createVirtualRegister(TRC);
11345 if (Subtarget->useMovt()) {
11346 BuildMI(BB, dl,
TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11349 }
else if (Subtarget->genExecuteOnly()) {
11350 assert(IsThumb &&
"Non-thumb expected to have used movt");
11359 unsigned Idx =
ConstantPool->getConstantPoolIndex(
C, Alignment);
11360 MachineMemOperand *CPMMO =
11384 MachineBasicBlock *entryBB = BB;
11386 Register varLoop =
MRI.createVirtualRegister(TRC);
11387 Register varPhi =
MRI.createVirtualRegister(TRC);
11388 Register srcLoop =
MRI.createVirtualRegister(TRC);
11389 Register srcPhi =
MRI.createVirtualRegister(TRC);
11390 Register destLoop =
MRI.createVirtualRegister(TRC);
11391 Register destPhi =
MRI.createVirtualRegister(TRC);
11399 BuildMI(BB, dl,
TII->get(ARM::PHI), destPhi)
11405 Register scratch =
MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11407 IsThumb1, IsThumb2);
11409 IsThumb1, IsThumb2);
11413 BuildMI(*BB, BB->
end(), dl,
TII->get(ARM::tSUBi8), varLoop)
11419 MachineInstrBuilder MIB =
11421 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11430 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11439 auto StartOfExit = exitMBB->
begin();
11443 unsigned srcIn = srcLoop;
11444 unsigned destIn = destLoop;
11445 for (
unsigned i = 0; i < BytesLeft; i++) {
11446 Register srcOut =
MRI.createVirtualRegister(TRC);
11447 Register destOut =
MRI.createVirtualRegister(TRC);
11448 Register scratch =
MRI.createVirtualRegister(TRC);
11449 emitPostLd(BB, StartOfExit,
TII, dl, 1, scratch, srcIn, srcOut,
11450 IsThumb1, IsThumb2);
11451 emitPostSt(BB, StartOfExit,
TII, dl, 1, scratch, destIn, destOut,
11452 IsThumb1, IsThumb2);
11457 MI.eraseFromParent();
11465 const TargetInstrInfo &
TII = *Subtarget->getInstrInfo();
11468 assert(Subtarget->isTargetWindows() &&
11469 "__chkstk is only supported on Windows");
11470 assert(Subtarget->isThumb2() &&
"Windows on ARM requires Thumb-2 mode");
11490 switch (TM.getCodeModel()) {
11532 MI.eraseFromParent();
11541 const TargetInstrInfo *
TII = Subtarget->getInstrInfo();
11556 .
addReg(
MI.getOperand(0).getReg())
11564 MI.eraseFromParent();
11588 if (miI == BB->
end()) {
11590 if (Succ->isLiveIn(ARM::CPSR))
11596 SelectItr->addRegisterKilled(ARM::CPSR,
TRI);
11608 Register AddDestReg =
MRI.createVirtualRegister(&ARM::rGPRRegClass);
11609 BuildMI(TpEntry, Dl,
TII->get(ARM::t2ADDri), AddDestReg)
11615 Register LsrDestReg =
MRI.createVirtualRegister(&ARM::rGPRRegClass);
11616 BuildMI(TpEntry, Dl,
TII->get(ARM::t2LSRri), LsrDestReg)
11622 Register TotalIterationsReg =
MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11623 BuildMI(TpEntry, Dl,
TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11626 BuildMI(TpEntry, Dl,
TII->get(ARM::t2WhileLoopStart))
11627 .
addUse(TotalIterationsReg)
11634 return TotalIterationsReg;
11645 Register TotalIterationsReg,
bool IsMemcpy) {
11652 SrcPhiReg =
MRI.createVirtualRegister(&ARM::rGPRRegClass);
11653 CurrSrcReg =
MRI.createVirtualRegister(&ARM::rGPRRegClass);
11654 BuildMI(TpLoopBody, Dl,
TII->get(ARM::PHI), SrcPhiReg)
11662 Register DestPhiReg =
MRI.createVirtualRegister(&ARM::rGPRRegClass);
11663 Register CurrDestReg =
MRI.createVirtualRegister(&ARM::rGPRRegClass);
11664 BuildMI(TpLoopBody, Dl,
TII->get(ARM::PHI), DestPhiReg)
11671 Register LoopCounterPhiReg =
MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11672 Register RemainingLoopIterationsReg =
11673 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11674 BuildMI(TpLoopBody, Dl,
TII->get(ARM::PHI), LoopCounterPhiReg)
11675 .
addUse(TotalIterationsReg)
11677 .
addUse(RemainingLoopIterationsReg)
11681 Register PredCounterPhiReg =
MRI.createVirtualRegister(&ARM::rGPRRegClass);
11682 Register RemainingElementsReg =
MRI.createVirtualRegister(&ARM::rGPRRegClass);
11683 BuildMI(TpLoopBody, Dl,
TII->get(ARM::PHI), PredCounterPhiReg)
11684 .
addUse(ElementCountReg)
11686 .
addUse(RemainingElementsReg)
11690 Register VccrReg =
MRI.createVirtualRegister(&ARM::VCCRRegClass);
11691 BuildMI(TpLoopBody, Dl,
TII->get(ARM::MVE_VCTP8), VccrReg)
11692 .
addUse(PredCounterPhiReg)
11697 BuildMI(TpLoopBody, Dl,
TII->get(ARM::t2SUBri), RemainingElementsReg)
11698 .
addUse(PredCounterPhiReg)
11706 SrcValueReg =
MRI.createVirtualRegister(&ARM::MQPRRegClass);
11707 BuildMI(TpLoopBody, Dl,
TII->get(ARM::MVE_VLDRBU8_post))
11716 SrcValueReg = OpSrcReg;
11718 BuildMI(TpLoopBody, Dl,
TII->get(ARM::MVE_VSTRBU8_post))
11729 BuildMI(TpLoopBody, Dl,
TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11730 .
addUse(LoopCounterPhiReg)
11733 BuildMI(TpLoopBody, Dl,
TII->get(ARM::t2LoopEnd))
11734 .
addUse(RemainingLoopIterationsReg)
11752 "Invalid call instruction for a KCFI check");
11755 switch (
MBBI->getOpcode()) {
11758 case ARM::BLX_pred:
11759 case ARM::BLX_noip:
11760 case ARM::BLX_pred_noip:
11762 TargetOp = &
MBBI->getOperand(0);
11764 case ARM::TCRETURNri:
11765 case ARM::TCRETURNrinotr12:
11766 case ARM::TAILJMPr:
11767 case ARM::TAILJMPr4:
11768 TargetOp = &
MBBI->getOperand(0);
11774 case ARM::tBLXr_noip:
11775 case ARM::tBX_CALL:
11776 TargetOp = &
MBBI->getOperand(2);
11779 case ARM::tTAILJMPr:
11780 TargetOp = &
MBBI->getOperand(0);
11786 assert(TargetOp && TargetOp->
isReg() &&
"Invalid target operand");
11790 unsigned KCFICheckOpcode;
11791 if (Subtarget->isThumb()) {
11792 if (Subtarget->isThumb2()) {
11793 KCFICheckOpcode = ARM::KCFI_CHECK_Thumb2;
11795 KCFICheckOpcode = ARM::KCFI_CHECK_Thumb1;
11798 KCFICheckOpcode = ARM::KCFI_CHECK_ARM;
11812 bool isThumb2 = Subtarget->isThumb2();
11813 switch (
MI.getOpcode()) {
11820 case ARM::tLDR_postidx: {
11824 .
add(
MI.getOperand(2))
11825 .
add(
MI.getOperand(3))
11826 .
add(
MI.getOperand(4))
11827 .
add(
MI.getOperand(0))
11829 MI.eraseFromParent();
11833 case ARM::MVE_MEMCPYLOOPINST:
11834 case ARM::MVE_MEMSETLOOPINST: {
11864 Register OpDestReg =
MI.getOperand(0).getReg();
11865 Register OpSrcReg =
MI.getOperand(1).getReg();
11866 Register OpSizeReg =
MI.getOperand(2).getReg();
11886 if (TpExit == BB) {
11888 "block containing memcpy/memset Pseudo");
11901 bool IsMemcpy =
MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
11903 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
11906 Properties.resetNoPHIs();
11918 MI.eraseFromParent();
11928 case ARM::t2STR_preidx:
11929 MI.setDesc(
TII->get(ARM::t2STR_PRE));
11931 case ARM::t2STRB_preidx:
11932 MI.setDesc(
TII->get(ARM::t2STRB_PRE));
11934 case ARM::t2STRH_preidx:
11935 MI.setDesc(
TII->get(ARM::t2STRH_PRE));
11938 case ARM::STRi_preidx:
11939 case ARM::STRBi_preidx: {
11940 unsigned NewOpc =
MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
11941 : ARM::STRB_PRE_IMM;
11943 unsigned Offset =
MI.getOperand(4).getImm();
11951 .
add(
MI.getOperand(0))
11952 .
add(
MI.getOperand(1))
11953 .
add(
MI.getOperand(2))
11955 .
add(
MI.getOperand(5))
11956 .
add(
MI.getOperand(6))
11958 MI.eraseFromParent();
11961 case ARM::STRr_preidx:
11962 case ARM::STRBr_preidx:
11963 case ARM::STRH_preidx: {
11965 switch (
MI.getOpcode()) {
11967 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG;
break;
11968 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG;
break;
11969 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE;
break;
11974 MI.eraseFromParent();
11978 case ARM::tMOVCCr_pseudo: {
11996 F->insert(It, copy0MBB);
11997 F->insert(It, sinkMBB);
12000 unsigned CallFrameSize =
TII->getCallFrameSizeAt(
MI);
12006 if (!
MI.killsRegister(ARM::CPSR,
nullptr) &&
12022 .
addImm(
MI.getOperand(3).getImm())
12023 .
addReg(
MI.getOperand(4).getReg());
12038 .
addReg(
MI.getOperand(1).getReg())
12040 .
addReg(
MI.getOperand(2).getReg())
12043 MI.eraseFromParent();
12048 case ARM::BCCZi64: {
12054 bool RHSisZero =
MI.getOpcode() == ARM::BCCZi64;
12059 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12063 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12069 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12073 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12083 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12092 MI.eraseFromParent();
12096 case ARM::Int_eh_sjlj_setjmp:
12097 case ARM::Int_eh_sjlj_setjmp_nofp:
12098 case ARM::tInt_eh_sjlj_setjmp:
12099 case ARM::t2Int_eh_sjlj_setjmp:
12100 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12103 case ARM::Int_eh_sjlj_setup_dispatch:
12104 EmitSjLjDispatchBlock(
MI, BB);
12106 case ARM::COPY_STRUCT_BYVAL_I32:
12108 return EmitStructByval(
MI, BB);
12109 case ARM::WIN__CHKSTK:
12110 return EmitLowered__chkstk(
MI, BB);
12111 case ARM::WIN__DBZCHK:
12112 return EmitLowered__dbzchk(
MI, BB);
12128 if (!
Node->hasAnyUseOfValue(0)) {
12129 MI.getOperand(0).setIsDead(
true);
12131 if (!
Node->hasAnyUseOfValue(1)) {
12132 MI.getOperand(1).setIsDead(
true);
12136 for (
unsigned I = 0;
I !=
MI.getOperand(4).
getImm(); ++
I) {
12137 Register TmpReg =
MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12138 : &ARM::GPRRegClass);
12145 if (
MI.getOpcode() == ARM::MEMCPY) {
12166 MI.getDesc().getNumOperands() + 5 -
MI.getDesc().getSize()
12167 &&
"converted opcode should be the same except for cc_out"
12168 " (and, on Thumb1, pred)");
12176 if (Subtarget->isThumb1Only()) {
12177 for (
unsigned c =
MCID->getNumOperands() - 4; c--;) {
12178 MI.addOperand(
MI.getOperand(1));
12179 MI.removeOperand(1);
12183 for (
unsigned i =
MI.getNumOperands(); i--;) {
12185 if (
op.isReg() &&
op.isUse()) {
12188 MI.tieOperands(DefIdx, i);
12196 ccOutIdx =
MCID->getNumOperands() - 1;
12198 ccOutIdx =
MCID->getNumOperands() - 1;
12202 if (!
MI.hasOptionalDef() || !
MCID->operands()[ccOutIdx].isOptionalDef()) {
12203 assert(!NewOpc &&
"Optional cc_out operand required");
12208 bool definesCPSR =
false;
12209 bool deadCPSR =
false;
12210 for (
unsigned i =
MCID->getNumOperands(), e =
MI.getNumOperands(); i != e;
12214 definesCPSR =
true;
12217 MI.removeOperand(i);
12221 if (!definesCPSR) {
12222 assert(!NewOpc &&
"Optional cc_out operand required");
12225 assert(deadCPSR == !
Node->hasAnyUseOfValue(1) &&
"inconsistent dead flag");
12227 assert(!
MI.getOperand(ccOutIdx).getReg() &&
12228 "expect uninitialized optional cc_out operand");
12230 if (!Subtarget->isThumb1Only())
12266 switch (
N->getOpcode()) {
12267 default:
return false;
12269 CC =
N->getOperand(0);
12291 EVT VT =
N->getValueType(0);
12292 CC =
N->getOperand(0);
12339 EVT VT =
N->getValueType(0);
12342 bool SwapSelectOps;
12344 NonConstantVal, DAG))
12350 OtherOp, NonConstantVal);
12356 CCOp, TrueVal, FalseVal);
12376 if (
N->getOpcode() == ARMISD::VUZP)
12380 if (
N->getOpcode() == ARMISD::VTRN &&
N->getValueType(0) == MVT::v2i32)
12395 if (!
N->getValueType(0).is64BitVector())
12403 EVT VT =
N->getValueType(0);
12442 EVT VT =
N->getValueType(0);
12448 Opcode = Intrinsic::arm_neon_vpaddls;
12450 Opcode = Intrinsic::arm_neon_vpaddlu;
12478 EVT VT =
N->getValueType(0);
12493 unsigned nextIndex = 0;
12544 Ops.push_back(Vec);
12561 return DAG.
getNode(ExtOp, dl, VT, tmp);
12592 if (SRA.getOpcode() !=
ISD::SRA) {
12599 if (Const->getZExtValue() != 31)
12604 if (SRA.getOperand(0) !=
Mul)
12608 SDLoc dl(AddcNode);
12609 unsigned Opcode = 0;
12614 Opcode = ARMISD::SMLALBB;
12615 Op0 =
Mul.getOperand(0);
12616 Op1 =
Mul.getOperand(1);
12618 Opcode = ARMISD::SMLALBT;
12619 Op0 =
Mul.getOperand(0);
12620 Op1 =
Mul.getOperand(1).getOperand(0);
12622 Opcode = ARMISD::SMLALTB;
12623 Op0 =
Mul.getOperand(0).getOperand(0);
12624 Op1 =
Mul.getOperand(1);
12626 Opcode = ARMISD::SMLALTT;
12627 Op0 =
Mul->getOperand(0).getOperand(0);
12628 Op1 =
Mul->getOperand(1).getOperand(0);
12644 SDValue resNode(AddcNode, 0);
12672 AddeSubeNode->
getOpcode() == ARMISD::SUBE) &&
12673 "Expect an ADDE or SUBE");
12677 "ADDE node has the wrong inputs");
12681 if ((AddeSubeNode->
getOpcode() == ARMISD::ADDE &&
12682 AddcSubcNode->
getOpcode() != ARMISD::ADDC) ||
12683 (AddeSubeNode->
getOpcode() == ARMISD::SUBE &&
12684 AddcSubcNode->
getOpcode() != ARMISD::SUBC))
12696 "Expect ADDC with two result values. First: i32");
12700 if (AddeSubeNode->
getOpcode() == ARMISD::ADDE &&
12716 bool IsLeftOperandMUL =
false;
12721 IsLeftOperandMUL =
true;
12732 SDValue *LowAddSub =
nullptr;
12735 if ((AddeSubeOp0 != MULOp.
getValue(1)) && (AddeSubeOp1 != MULOp.
getValue(1)))
12738 if (IsLeftOperandMUL)
12739 HiAddSub = &AddeSubeOp1;
12741 HiAddSub = &AddeSubeOp0;
12746 if (AddcSubcOp0 == MULOp.
getValue(0)) {
12747 LoMul = &AddcSubcOp0;
12748 LowAddSub = &AddcSubcOp1;
12750 if (AddcSubcOp1 == MULOp.
getValue(0)) {
12751 LoMul = &AddcSubcOp1;
12752 LowAddSub = &AddcSubcOp0;
12760 if (AddcSubcNode == HiAddSub->getNode() ||
12776 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->
useMulOps() &&
12781 Ops.push_back(*HiAddSub);
12782 if (AddcSubcNode->
getOpcode() == ARMISD::SUBC) {
12783 FinalOpc = ARMISD::SMMLSR;
12785 FinalOpc = ARMISD::SMMLAR;
12790 return SDValue(AddeSubeNode, 0);
12791 }
else if (AddcSubcNode->
getOpcode() == ARMISD::SUBC)
12797 Ops.push_back(*LowAddSub);
12798 Ops.push_back(*HiAddSub);
12811 return SDValue(AddeSubeNode, 0);
12823 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12828 if (AddcNode->
getOpcode() != ARMISD::ADDC)
12832 SDNode *UmlalNode =
nullptr;
12871 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12876 SDNode* AddcNode =
N->getOperand(2).getNode();
12877 SDNode* AddeNode =
N->getOperand(3).getNode();
12878 if ((AddcNode->
getOpcode() == ARMISD::ADDC) &&
12879 (AddeNode->
getOpcode() == ARMISD::ADDE) &&
12885 {N->getOperand(0), N->getOperand(1),
12886 AddcNode->getOperand(0), AddcNode->getOperand(1)});
12896 if (
N->getOpcode() == ARMISD::SUBC &&
N->hasAnyUseOfValue(1)) {
12900 if (
LHS->getOpcode() == ARMISD::ADDE &&
12910 int32_t imm =
C->getSExtValue();
12911 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
12914 unsigned Opcode = (
N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
12916 return DAG.
getNode(Opcode,
DL,
N->getVTList(),
N->getOperand(0),
RHS);
12931 int64_t imm =
C->getSExtValue();
12940 unsigned Opcode = (
N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
12942 return DAG.
getNode(Opcode,
DL,
N->getVTList(),
12943 N->getOperand(0),
RHS,
N->getOperand(2));
12955 if (!Subtarget->hasMVEIntegerOps())
12968 SetCC =
N->getOperand(0);
12972 TrueVal =
N->getOperand(1);
12973 FalseVal =
N->getOperand(2);
12975 LHS =
N->getOperand(0);
12976 RHS =
N->getOperand(1);
12978 TrueVal =
N->getOperand(2);
12979 FalseVal =
N->getOperand(3);
12984 unsigned int Opcode = 0;
12988 Opcode = ARMISD::VMINVu;
12994 Opcode = ARMISD::VMINVs;
13000 Opcode = ARMISD::VMAXVu;
13006 Opcode = ARMISD::VMAXVs;
13013 switch (TrueVal->getOpcode()) {
13032 if (TrueVal !=
LHS || FalseVal !=
RHS)
13035 EVT LeftType =
LHS->getValueType(0);
13036 EVT RightType =
RHS->getValueType(0);
13039 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13043 if (VectorScalarType != MVT::i32)
13051 if (VectorScalarType != MVT::i32)
13064 EVT VT =
N->getValueType(0);
13072 Shft =
N->getOperand(0);
13079 Cmp.getOperand(0) !=
N->getOperand(1) ||
13080 Cmp.getOperand(1) !=
N->getOperand(2))
13082 Shft =
N->getOperand(1);
13094 ScalarType = MVT::i8;
13097 case (1 << 15) - 1:
13098 ScalarType = MVT::i16;
13101 case (1ULL << 31) - 1:
13102 ScalarType = MVT::i32;
13133 unsigned LegalLanes = 128 / (ShftAmt + 1);
13145 Inp0 = DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, LegalVecVT, Inp0);
13146 Inp1 = DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, LegalVecVT, Inp1);
13147 SDValue VQDMULH = DAG.
getNode(ARMISD::VQDMULH,
DL, LegalVecVT, Inp0, Inp1);
13148 SDValue Trunc = DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, ExtVecVT, VQDMULH);
13157 for (
unsigned I = 0;
I < NumParts; ++
I) {
13164 SDValue VQDMULH = DAG.
getNode(ARMISD::VQDMULH,
DL, LegalVecVT, Inp0, Inp1);
13174 if (!Subtarget->hasMVEIntegerOps())
13189 if (
N->getOperand(0).getOpcode() !=
ISD::XOR)
13199 if (!Const || !Const->isOne())
13217 EVT VT =
N->getValueType(0);
13219 if (!Subtarget->hasMVEIntegerOps() ||
13248 Opc = Intrinsic::arm_mve_vctp64;
13251 Opc = Intrinsic::arm_mve_vctp32;
13254 Opc = Intrinsic::arm_mve_vctp16;
13257 Opc = Intrinsic::arm_mve_vctp8;
13311 EVT VT =
N->getValueType(0);
13317 switch (
Op.getOpcode()) {
13319 case ARMISD::VADDVs:
13320 case ARMISD::VADDVu:
13321 case ARMISD::VMLAVs:
13322 case ARMISD::VMLAVu:
13342 unsigned N0RedOp = 0;
13349 unsigned N1RedOp = 0;
13363 if (
SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13365 if (
SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13372 auto DistrubuteVecReduceLoad = [&](
SDValue N0,
SDValue N1,
bool IsForward) {
13396 if (!BaseLocDecomp0.getBase() ||
13397 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13398 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13400 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13402 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13412 if (IsBefore < 0) {
13415 }
else if (IsBefore > 0) {
13428 }
else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13438 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13448 if (
SDValue R = DistrubuteVecReduceLoad(N0, N1,
true))
13450 if (
SDValue R = DistrubuteVecReduceLoad(N1, N0,
false))
13457 if (!Subtarget->hasMVEIntegerOps())
13463 EVT VT =
N->getValueType(0);
13468 if (VT != MVT::i64)
13479 auto MakeVecReduce = [&](
unsigned Opcode,
unsigned OpcodeA,
SDValue NA,
13499 unsigned S = VecRed->
getOpcode() == OpcodeA ? 2 : 0;
13508 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13510 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13512 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13514 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13516 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13518 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13520 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13522 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13524 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13526 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13528 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13530 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13532 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13534 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13536 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13538 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13548 "Expected shift op");
13550 SDValue ShiftLHS =
N->getOperand(0);
13564 if (Subtarget->isThumb1Only()) {
13575 if (Const->getAPIntValue().ult(256))
13578 Const->getAPIntValue().sgt(-256))
13594 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
13595 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
13596 "Expected XOR(SHIFT) pattern");
13601 if (XorC && ShiftC) {
13602 unsigned MaskIdx, MaskLen;
13603 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13604 unsigned ShiftAmt = ShiftC->getZExtValue();
13605 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
13606 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
13607 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
13608 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
13618 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
13620 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
13621 "Expected shift-shift mask");
13623 if (!Subtarget->isThumb1Only())
13626 EVT VT =
N->getValueType(0);
13634 unsigned BinOpcode,
EVT VT,
unsigned SelectOpcode,
SDValue X,
13636 return Subtarget->hasMVEIntegerOps() &&
isTypeLegal(VT) &&
13641 if (!Subtarget->hasNEON()) {
13642 if (Subtarget->isThumb1Only())
13656 return Subtarget->hasVFP2Base();
13658 return Subtarget->hasVFP2Base();
13660 return Subtarget->hasFP64();
13663 return Subtarget->hasMVEFloatOps();
13692 if (ST->isThumb() && ST->isThumb1Only())
13696 for (
auto *U :
N->users()) {
13697 switch(U->getOpcode()) {
13715 if (U->getOperand(0).getOpcode() ==
ISD::SHL ||
13716 U->getOperand(1).getOpcode() ==
ISD::SHL)
13726 if (
N->getOperand(0).getOpcode() !=
ISD::SHL)
13733 if (!C1ShlC2 || !C2)
13736 APInt C2Int = C2->getAPIntValue();
13737 APInt C1Int = C1ShlC2->getAPIntValue();
13739 if (C2Int.
uge(C2Width))
13745 if ((C1Int & Mask) != C1Int)
13752 auto LargeImm = [](
const APInt &Imm) {
13753 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
13754 return Imm.getBitWidth() - Zeros > 8;
13757 if (LargeImm(C1Int) || LargeImm(C2Int))
13769 SHL.dump();
N->dump());
13872 if (!Subtarget->hasMVEIntegerOps() || !
N->getValueType(0).isVector())
13893 return DCI.
DAG.
getNode(ARMISD::VDUP, dl,
N->getValueType(0), Negate);
13914 if (!Subtarget->hasVMLxForwarding())
13933 EVT VT =
N->getValueType(0);
13944 EVT VT =
N->getValueType(0);
13945 if (VT != MVT::v2i64)
13956 return Op->getOperand(0);
13970 And =
And->getOperand(0);
13975 Mask = Mask->getOperand(0);
13978 Mask.getValueType() != MVT::v4i32)
13984 return And->getOperand(0);
13989 if (
SDValue Op0 = IsSignExt(N0)) {
13990 if (
SDValue Op1 = IsSignExt(N1)) {
13991 SDValue New0a = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
13992 SDValue New1a = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
13993 return DAG.
getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
13996 if (
SDValue Op0 = IsZeroExt(N0)) {
13997 if (
SDValue Op1 = IsZeroExt(N1)) {
13998 SDValue New0a = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
13999 SDValue New1a = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14000 return DAG.
getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14012 EVT VT =
N->getValueType(0);
14013 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14024 if (VT != MVT::i32)
14031 int64_t MulAmt =
C->getSExtValue();
14034 ShiftAmt = ShiftAmt & (32 - 1);
14039 MulAmt >>= ShiftAmt;
14100 if (
N->getValueType(0) != MVT::i32)
14109 if (C1 == 255 || C1 == 65535)
14112 SDNode *N0 =
N->getOperand(0).getNode();
14126 if (!C2 || C2 >= 32)
14170 if (Trailing == C2 && C2 + C3 < 32) {
14183 if (Leading == C2 && C2 + C3 < 32) {
14211 EVT VT =
N->getValueType(0);
14215 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14218 APInt SplatBits, SplatUndef;
14219 unsigned SplatBitSize;
14221 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14222 BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14223 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14224 SplatBitSize == 64) {
14231 DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT,
N->getOperand(0));
14233 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
14258 if (!Subtarget->hasV6Ops() ||
14259 (Subtarget->isThumb() &&
14260 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14263 SDValue SRL = OR->getOperand(0);
14264 SDValue SHL = OR->getOperand(1);
14267 SRL = OR->getOperand(1);
14268 SHL = OR->getOperand(0);
14275 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14279 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14280 if (SRL.getOperand(0) !=
SDValue(SMULLOHI, 0) ||
14281 SHL.getOperand(0) !=
SDValue(SMULLOHI, 1))
14300 unsigned Opcode = 0;
14301 if (
isS16(OpS16, DAG))
14302 Opcode = ARMISD::SMULWB;
14304 Opcode = ARMISD::SMULWT;
14319 if (Subtarget->
isThumb1Only() || !Subtarget->hasV6T2Ops())
14322 EVT VT =
N->getValueType(0);
14337 if (VT != MVT::i32)
14350 if (Mask == 0xffff)
14357 if ((Val & ~Mask) != Val)
14363 Res = DAG.
getNode(ARMISD::BFI,
DL, VT, N00,
14382 (Mask == ~Mask2)) {
14385 if (Subtarget->hasDSP() &&
14386 (Mask == 0xffff || Mask == 0xffff0000))
14392 Res = DAG.
getNode(ARMISD::BFI,
DL, VT, N00, Res,
14399 (~Mask == Mask2)) {
14402 if (Subtarget->hasDSP() &&
14403 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14459 if (
N->getOpcode() == ARMISD::VCMP)
14461 else if (
N->getOpcode() == ARMISD::VCMPZ)
14469 return isValidMVECond(CC,
N->getOperand(0).getValueType().isFloatingPoint());
14476 EVT VT =
N->getValueType(0);
14481 auto IsFreelyInvertable = [&](
SDValue V) {
14482 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14488 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14504 EVT VT =
N->getValueType(0);
14510 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14511 VT == MVT::v8i1 || VT == MVT::v16i1))
14514 APInt SplatBits, SplatUndef;
14515 unsigned SplatBitSize;
14517 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14518 BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14519 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14520 SplatBitSize == 64) {
14527 DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT,
N->getOperand(0));
14529 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
14556 unsigned SplatBitSize;
14559 APInt SplatBits0, SplatBits1;
14563 if (BVN0 && BVN0->
isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14564 HasAnyUndefs) && !HasAnyUndefs) {
14565 if (BVN1 && BVN1->
isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14566 HasAnyUndefs) && !HasAnyUndefs) {
14571 SplatBits0 == ~SplatBits1) {
14579 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14601 EVT VT =
N->getValueType(0);
14616 if (Subtarget->hasMVEIntegerOps()) {
14644 assert(
N->getOpcode() == ARMISD::BFI);
14647 ToMask =
~N->getConstantOperandAPInt(2);
14667 unsigned LastActiveBitInA =
A.countr_zero();
14668 unsigned FirstActiveBitInB =
B.getBitWidth() -
B.countl_zero() - 1;
14669 return LastActiveBitInA - 1 == FirstActiveBitInB;
14674 APInt ToMask, FromMask;
14679 if (V.getOpcode() != ARMISD::BFI)
14682 APInt NewToMask, NewFromMask;
14684 if (NewFrom != From)
14688 if ((NewToMask & ToMask).getBoolValue())
14713 unsigned InvMask =
N->getConstantOperandVal(2);
14717 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14718 "undefined behavior");
14719 unsigned Mask = (1u << Width) - 1;
14721 if ((Mask & (~Mask2)) == 0)
14723 N->getOperand(0), N1.
getOperand(0),
N->getOperand(2));
14730 APInt ToMask1, FromMask1;
14733 APInt ToMask2, FromMask2;
14739 APInt NewFromMask = FromMask1 | FromMask2;
14740 APInt NewToMask = ToMask1 | ToMask2;
14742 EVT VT =
N->getValueType(0);
14745 if (NewFromMask[0] == 0)
14748 return DAG.
getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14756 if (
N->getOperand(0).getOpcode() == ARMISD::BFI) {
14757 APInt ToMask1 =
~N->getConstantOperandAPInt(2);
14758 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14760 if (!N0.
hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14764 EVT VT =
N->getValueType(0);
14767 N->getOperand(1),
N->getOperand(2));
14779 if (Cmp->getOpcode() != ARMISD::CMPZ || !
isNullConstant(Cmp->getOperand(1)))
14781 SDValue CSInc = Cmp->getOperand(0);
14791 if (CSInc.
getOpcode() == ARMISD::CSINC &&
14831 if (
N->getConstantOperandVal(2) ==
ARMCC::EQ)
14832 return DAG.
getNode(
N->getOpcode(),
SDLoc(
N), MVT::i32,
N->getOperand(0),
14835 if (
N->getConstantOperandVal(2) ==
ARMCC::NE)
14837 N->getOpcode(),
SDLoc(
N), MVT::i32,
N->getOperand(0),
14850 SDValue InDouble =
N->getOperand(0);
14851 if (InDouble.
getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
14865 SDValue BasePtr = LD->getBasePtr();
14867 DAG.
getLoad(MVT::i32,
DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
14868 LD->getAlign(), LD->getMemOperand()->getFlags());
14874 LD->getPointerInfo().getWithOffset(4),
14876 LD->getMemOperand()->getFlags());
14895 BV.
getOpcode() == ARMISD::VECTOR_REG_CAST) &&
14909 if (!Subtarget->
isLittle() && BVSwap)
14927 if (!Subtarget->
isLittle() && BVSwap)
14946 if (Op0.
getOpcode() == ARMISD::VMOVRRD &&
14959 if (Op0->
getOpcode() == ARMISD::VMOVrh)
14972 if (Copy.getValueType() == MVT::f32 &&
14974 bool HasGlue = Copy->getNumOperands() == 3;
14975 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
14976 HasGlue ? Copy->getOperand(2) :
SDValue()};
14977 EVT OutTys[] = {
N->getValueType(0), MVT::Other, MVT::Glue};
14996 if (LN0->hasOneUse() && LN0->isUnindexed() &&
14997 LN0->getMemoryVT() == MVT::i16) {
15000 LN0->getBasePtr(), LN0->getMemOperand());
15018 EVT VT =
N->getValueType(0);
15052 unsigned NumElts =
N->getValueType(0).getVectorNumElements();
15053 for (
unsigned i = 0; i < NumElts; ++i) {
15054 SDNode *Elt =
N->getOperand(i).getNode();
15071 if (
N->getNumOperands() == 2)
15077 EVT VT =
N->getValueType(0);
15083 for (
unsigned i = 0; i < NumElts; ++i) {
15109 EVT VT =
N->getValueType(0);
15117 assert(EltVT == MVT::f32 &&
"Unexpected type!");
15122 Use->getValueType(0).isFloatingPoint())
15130 unsigned NumOfBitCastedElts = 0;
15132 unsigned NumOfRelevantElts = NumElts;
15133 for (
unsigned Idx = 0; Idx < NumElts; ++Idx) {
15138 ++NumOfBitCastedElts;
15142 --NumOfRelevantElts;
15146 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15164 for (
unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15169 V->getOperand(0).getValueType() == MVT::i32)
15171 V = V.getOperand(0);
15188 EVT VT =
N->getValueType(0);
15193 if (
Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15195 if (
Op->getOperand(0).getValueType() == VT)
15196 return Op->getOperand(0);
15197 return DCI.
DAG.
getNode(ARMISD::PREDICATE_CAST, dl, VT,
Op->getOperand(0));
15204 DCI.
DAG.
getNode(ARMISD::PREDICATE_CAST, dl, VT,
Op->getOperand(0));
15211 if (
Op.getValueType() == MVT::i32) {
15222 EVT VT =
N->getValueType(0);
15227 if (ST->isLittle())
15231 if (
Op.getValueType() == VT)
15238 if (
Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15240 if (
Op->getOperand(0).getValueType() == VT)
15241 return Op->getOperand(0);
15242 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT,
Op->getOperand(0));
15250 if (!Subtarget->hasMVEIntegerOps())
15253 EVT VT =
N->getValueType(0);
15261 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, Op0,
N->getOperand(2));
15267 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, Op1,
15271 return DAG.
getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15284 EVT VT =
N->getValueType(0);
15285 SDNode *Elt =
N->getOperand(1).getNode();
15300 Vec, V,
N->getOperand(2));
15310 EVT VT =
N->getValueType(0);
15338 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15339 isa<ConstantSDNode>(V->getOperand(1)) &&
15340 V->getConstantOperandVal(1) == Lane + 1 &&
15341 V->getOperand(0).getResNo() == ResNo;
15343 if (OtherIt == Op0->
users().
end())
15348 SDValue OtherExt(*OtherIt, 0);
15360 DCI.
DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15363 DCI.
DAG.
getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32},
F64);
15373 EVT VT =
N->getValueType(0);
15377 if (Op0->
getOpcode() == ARMISD::VDUP) {
15379 if (VT == MVT::f16 &&
X.getValueType() == MVT::i32)
15380 return DCI.
DAG.
getNode(ARMISD::VMOVhr, dl, VT,
X);
15381 if (VT == MVT::i32 &&
X.getValueType() == MVT::f16)
15382 return DCI.
DAG.
getNode(ARMISD::VMOVrh, dl, VT,
X);
15383 if (VT == MVT::f32 &&
X.getValueType() == MVT::i32)
15386 while (
X.getValueType() != VT &&
X->getOpcode() ==
ISD::BITCAST)
15387 X =
X->getOperand(0);
15388 if (
X.getValueType() == VT)
15396 return Op0.
getOperand(
N->getConstantOperandVal(1));
15406 unsigned Offset =
N->getConstantOperandVal(1);
15408 if (MOV.
getOpcode() == ARMISD::VMOVDRR)
15418 unsigned Idx =
N->getConstantOperandVal(1);
15432 EVT VT =
N->getValueType(0);
15435 if (
Op.getOpcode() == ARMISD::VGETLANEu &&
15437 Op.getOperand(0).getValueType().getScalarType())
15438 return DAG.
getNode(ARMISD::VGETLANEs,
SDLoc(
N), VT,
Op.getOperand(0),
15447 SDValue SubVec =
N->getOperand(1);
15448 uint64_t IdxVal =
N->getConstantOperandVal(2);
15459 if (IdxVal == 0 && Vec.
isUndef())
15465 (IdxVal != 0 && IdxVal != NumSubElts))
15496 ARMISD::VMOVN,
DL, VT,
15502 ARMISD::VMOVN,
DL, VT,
15538 EVT VT =
N->getValueType(0);
15549 unsigned HalfElts = NumElts/2;
15551 for (
unsigned n = 0; n < NumElts; ++n) {
15554 if (MaskElt < (
int)HalfElts)
15556 else if (MaskElt >= (
int)NumElts && MaskElt < (
int)(NumElts + HalfElts))
15557 NewElt = HalfElts + MaskElt - NumElts;
15600 bool SimpleConstIncOnly,
15608 bool isLoadOp =
true;
15609 bool isLaneOp =
false;
15612 bool hasAlignment =
true;
15613 unsigned NewOpc = 0;
15614 unsigned NumVecs = 0;
15615 if (
Target.isIntrinsic) {
15616 unsigned IntNo =
N->getConstantOperandVal(1);
15620 case Intrinsic::arm_neon_vld1:
15624 case Intrinsic::arm_neon_vld2:
15628 case Intrinsic::arm_neon_vld3:
15632 case Intrinsic::arm_neon_vld4:
15636 case Intrinsic::arm_neon_vld1x2:
15639 hasAlignment =
false;
15641 case Intrinsic::arm_neon_vld1x3:
15644 hasAlignment =
false;
15646 case Intrinsic::arm_neon_vld1x4:
15649 hasAlignment =
false;
15651 case Intrinsic::arm_neon_vld2dup:
15655 case Intrinsic::arm_neon_vld3dup:
15659 case Intrinsic::arm_neon_vld4dup:
15663 case Intrinsic::arm_neon_vld2lane:
15668 case Intrinsic::arm_neon_vld3lane:
15673 case Intrinsic::arm_neon_vld4lane:
15678 case Intrinsic::arm_neon_vst1:
15683 case Intrinsic::arm_neon_vst2:
15684 NewOpc = ARMISD::VST2_UPD;
15688 case Intrinsic::arm_neon_vst3:
15693 case Intrinsic::arm_neon_vst4:
15694 NewOpc = ARMISD::VST4_UPD;
15698 case Intrinsic::arm_neon_vst2lane:
15704 case Intrinsic::arm_neon_vst3lane:
15710 case Intrinsic::arm_neon_vst4lane:
15716 case Intrinsic::arm_neon_vst1x2:
15720 hasAlignment =
false;
15722 case Intrinsic::arm_neon_vst1x3:
15726 hasAlignment =
false;
15728 case Intrinsic::arm_neon_vst1x4:
15732 hasAlignment =
false;
15737 switch (
N->getOpcode()) {
15773 VecTy =
N->getValueType(0);
15774 }
else if (
Target.isIntrinsic) {
15775 VecTy =
N->getOperand(
Target.AddrOpIdx + 1).getValueType();
15778 "Node has to be a load, a store, or an intrinsic!");
15779 VecTy =
N->getOperand(1).getValueType();
15787 if (isLaneOp || isVLDDUPOp)
15790 if (NumBytes >= 3 * 16 &&
User.ConstInc != NumBytes) {
15796 if (SimpleConstIncOnly &&
User.ConstInc != NumBytes)
15805 EVT AlignedVecTy = VecTy;
15825 assert(NumVecs == 1 &&
"Unexpected multi-element generic load/store.");
15826 assert(!isLaneOp &&
"Unexpected generic load/store lane.");
15837 Alignment =
Align(1);
15843 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
15845 for (n = 0; n < NumResultVecs; ++n)
15846 Tys[n] = AlignedVecTy;
15847 Tys[n++] = MVT::i32;
15848 Tys[n] = MVT::Other;
15853 Ops.push_back(
N->getOperand(0));
15854 Ops.push_back(
N->getOperand(
Target.AddrOpIdx));
15859 Ops.push_back(StN->getValue());
15863 unsigned LastOperand =
15864 hasAlignment ?
N->getNumOperands() - 1 :
N->getNumOperands();
15865 for (
unsigned i =
Target.AddrOpIdx + 1; i < LastOperand; ++i)
15866 Ops.push_back(
N->getOperand(i));
15874 if (AlignedVecTy != VecTy &&
N->getOpcode() ==
ISD::STORE) {
15885 for (
unsigned i = 0; i < NumResultVecs; ++i)
15890 if (AlignedVecTy != VecTy &&
N->getOpcode() ==
ISD::LOAD) {
15891 SDValue &LdVal = NewResults[0];
15927 switch (
N->getOpcode()) {
15931 *Ptr =
N->getOperand(0);
15932 *CInc =
N->getOperand(1);
15939 *Ptr =
N->getOperand(1);
15940 *CInc =
N->getOperand(2);
15967 SDValue Addr =
N->getOperand(AddrOpIdx);
15978 unsigned ConstInc =
15983 if (BaseUpdates.
size() >= MaxBaseUpdates)
16003 unsigned UserOffset =
16006 if (!UserOffset || UserOffset <=
Offset)
16009 unsigned NewConstInc = UserOffset -
Offset;
16012 if (BaseUpdates.
size() >= MaxBaseUpdates)
16019 unsigned NumValidUpd = BaseUpdates.
size();
16020 for (
unsigned I = 0;
I < NumValidUpd;
I++) {
16031 return LHS.ConstInc <
RHS.ConstInc;
16060 unsigned IntNo =
N->getConstantOperandVal(1);
16061 if (IntNo == Intrinsic::arm_mve_vst2q &&
N->getConstantOperandVal(5) != 1)
16063 if (IntNo == Intrinsic::arm_mve_vst4q &&
N->getConstantOperandVal(7) != 3)
16086 bool isLoadOp =
true;
16087 unsigned NewOpc = 0;
16088 unsigned NumVecs = 0;
16092 case Intrinsic::arm_mve_vld2q:
16096 case Intrinsic::arm_mve_vld4q:
16100 case Intrinsic::arm_mve_vst2q:
16101 NewOpc = ARMISD::VST2_UPD;
16105 case Intrinsic::arm_mve_vst4q:
16106 NewOpc = ARMISD::VST4_UPD;
16115 VecTy =
N->getValueType(0);
16117 VecTy =
N->getOperand(3).getValueType();
16131 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16133 for (n = 0; n < NumResultVecs; ++n)
16135 Tys[n++] = MVT::i32;
16136 Tys[n] = MVT::Other;
16141 Ops.push_back(
N->getOperand(0));
16142 Ops.push_back(
N->getOperand(2));
16143 Ops.push_back(Inc);
16145 for (
unsigned i = 3; i <
N->getNumOperands(); ++i)
16146 Ops.push_back(
N->getOperand(i));
16153 for (
unsigned i = 0; i < NumResultVecs; ++i)
16172 EVT VT =
N->getValueType(0);
16178 SDNode *VLD =
N->getOperand(0).getNode();
16181 unsigned NumVecs = 0;
16182 unsigned NewOpc = 0;
16184 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16187 }
else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16190 }
else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16202 if (
Use.getResNo() == NumVecs)
16205 if (
User->getOpcode() != ARMISD::VDUPLANE ||
16206 VLDLaneNo !=
User->getConstantOperandVal(1))
16213 for (n = 0; n < NumVecs; ++n)
16215 Tys[n] = MVT::Other;
16225 unsigned ResNo =
Use.getResNo();
16227 if (ResNo == NumVecs)
16234 std::vector<SDValue> VLDDupResults;
16235 for (
unsigned n = 0; n < NumVecs; ++n)
16249 EVT VT =
N->getValueType(0);
16252 if (Subtarget->hasMVEIntegerOps()) {
16256 ExtractVT = MVT::i32;
16258 N->getOperand(0),
N->getOperand(1));
16270 Op =
Op.getOperand(0);
16271 if (
Op.getOpcode() != ARMISD::VMOVIMM &&
Op.getOpcode() != ARMISD::VMVNIMM)
16275 unsigned EltSize =
Op.getScalarValueSizeInBits();
16277 unsigned Imm =
Op.getConstantOperandVal(0);
16293 if (Subtarget->hasMVEIntegerOps()) {
16296 if (
Op.getValueType() == MVT::f32)
16297 return DAG.
getNode(ARMISD::VDUP, dl,
N->getValueType(0),
16299 else if (
Op.getValueType() == MVT::f16)
16300 return DAG.
getNode(ARMISD::VDUP, dl,
N->getValueType(0),
16301 DAG.
getNode(ARMISD::VMOVrh, dl, MVT::i32,
Op));
16304 if (!Subtarget->hasNEON())
16311 if (LD &&
Op.hasOneUse() && LD->isUnindexed() &&
16312 LD->getMemoryVT() ==
N->getValueType(0).getVectorElementType()) {
16313 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16318 LD->getMemoryVT(), LD->getMemOperand());
16329 EVT VT =
N->getValueType(0);
16351 assert(StVT != VT &&
"Cannot truncate to the same type");
16361 if (0 != (NumElems * FromEltSz) % ToEltSz)
16364 unsigned SizeRatio = FromEltSz / ToEltSz;
16369 NumElems * SizeRatio);
16375 for (
unsigned i = 0; i < NumElems; ++i)
16389 MVT StoreType = MVT::i8;
16391 if (TLI.
isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16411 for (
unsigned I = 0;
I <
E;
I++) {
16442 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16445 unsigned NumElements = 4;
16462 unsigned Off0 = Rev ? NumElts : 0;
16463 unsigned Off1 = Rev ? 0 : NumElts;
16465 for (
unsigned I = 0;
I < NumElts;
I += 2) {
16466 if (M[
I] >= 0 && M[
I] != (
int)(Off0 +
I / 2))
16468 if (M[
I + 1] >= 0 && M[
I + 1] != (
int)(Off1 +
I / 2))
16476 if (isVMOVNShuffle(Shuffle,
false) || isVMOVNShuffle(Shuffle,
true))
16496 unsigned NewOffset = i * NumElements * ToEltVT.
getSizeInBits() / 8;
16507 Extract = DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, MVT::v4i32, FPTrunc);
16511 NewToVT, Alignment, MMOFlags, AAInfo);
16544 unsigned NewOffset =
16552 NewToVT, Alignment, MMOFlags, AAInfo);
16574 {Extract.getOperand(0), Extract.getOperand(1)});
16605 if (Subtarget->hasNEON())
16609 if (Subtarget->hasMVEFloatOps())
16613 if (Subtarget->hasMVEIntegerOps()) {
16688 if (!Subtarget->hasNEON())
16692 if (!
Op.getValueType().isVector() || !
Op.getValueType().isSimple() ||
16700 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
16702 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
16703 uint32_t IntBits = IntTy.getSizeInBits();
16704 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
16705 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16716 if (
C == -1 ||
C == 0 ||
C > 32)
16721 unsigned IntrinsicOpcode =
isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16722 Intrinsic::arm_neon_vcvtfp2fxu;
16725 DAG.
getConstant(IntrinsicOpcode, dl, MVT::i32),
Op->getOperand(0),
16728 if (IntBits < FloatBits)
16736 if (!Subtarget->hasMVEFloatOps())
16744 EVT VT =
N->getValueType(0);
16749 auto isIdentitySplat = [&](
SDValue Op,
bool NSZ) {
16751 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16753 uint64_t ImmVal =
Op.getOperand(0).getConstantOperandVal(0);
16754 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16756 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16769 if (!isIdentitySplat(Op1.
getOperand(2), NSZ))
16780 EVT VT =
N->getValueType(0);
16783 if (!
N->getFlags().hasAllowReassociation())
16790 unsigned Opc =
A.getConstantOperandVal(0);
16791 if (
Opc != Intrinsic::arm_mve_vcmlaq)
16796 A.getOperand(3),
A.getOperand(4));
16828 if (!Subtarget->hasNEON())
16832 unsigned OpOpcode =
Op.getNode()->getOpcode();
16833 if (!
N->getValueType(0).isVector() || !
N->getValueType(0).isSimple() ||
16837 SDValue ConstVec =
N->getOperand(1);
16841 MVT FloatTy =
N->getSimpleValueType(0).getVectorElementType();
16843 MVT IntTy =
Op.getOperand(0).getSimpleValueType().getVectorElementType();
16844 uint32_t IntBits = IntTy.getSizeInBits();
16845 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
16846 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16866 int32_t
C = IntVal.exactLogBase2();
16867 if (
C == -1 ||
C == 0 ||
C > 32)
16873 if (IntBits < FloatBits)
16875 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
16877 unsigned IntrinsicOpcode =
isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
16878 : Intrinsic::arm_neon_vcvtfxu2fp;
16886 if (!ST->hasMVEIntegerOps())
16890 EVT ResVT =
N->getValueType(0);
16918 EVT AVT =
A.getValueType();
16924 auto ExtendIfNeeded = [&](
SDValue A,
unsigned ExtendCode) {
16925 EVT AVT =
A.getValueType();
16933 auto IsVADDV = [&](
MVT RetTy,
unsigned ExtendCode,
ArrayRef<MVT> ExtTypes) {
16934 if (ResVT != RetTy || N0->
getOpcode() != ExtendCode)
16937 if (ExtTypeMatches(
A, ExtTypes))
16938 return ExtendIfNeeded(
A, ExtendCode);
16941 auto IsPredVADDV = [&](
MVT RetTy,
unsigned ExtendCode,
16951 if (ExtTypeMatches(
A, ExtTypes))
16952 return ExtendIfNeeded(
A, ExtendCode);
16955 auto IsVMLAV = [&](
MVT RetTy,
unsigned ExtendCode,
ArrayRef<MVT> ExtTypes,
16965 if (ResVT != RetTy)
16968 if (
Mul->getOpcode() == ExtendCode &&
16969 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
16971 Mul =
Mul->getOperand(0);
16980 if (ExtTypeMatches(
A, ExtTypes) && ExtTypeMatches(
B, ExtTypes)) {
16981 A = ExtendIfNeeded(
A, ExtendCode);
16982 B = ExtendIfNeeded(
B, ExtendCode);
16987 auto IsPredVMLAV = [&](
MVT RetTy,
unsigned ExtendCode,
ArrayRef<MVT> ExtTypes,
17000 if (
Mul->getOpcode() == ExtendCode &&
17001 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17003 Mul =
Mul->getOperand(0);
17012 if (ExtTypeMatches(
A, ExtTypes) && ExtTypeMatches(
B, ExtTypes)) {
17013 A = ExtendIfNeeded(
A, ExtendCode);
17014 B = ExtendIfNeeded(
B, ExtendCode);
17025 EVT VT =
Ops[0].getValueType();
17026 if (VT == MVT::v16i8) {
17027 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17028 "Unexpected illegal long reduction opcode");
17029 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17041 DAG.
getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17054 return DAG.
getNode(ARMISD::VMLAVs, dl, ResVT,
A,
B);
17056 return DAG.
getNode(ARMISD::VMLAVu, dl, ResVT,
A,
B);
17057 if (IsVMLAV(MVT::i64,
ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17059 return Create64bitNode(ARMISD::VMLALVs, {
A,
B});
17060 if (IsVMLAV(MVT::i64,
ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17062 return Create64bitNode(ARMISD::VMLALVu, {
A,
B});
17065 DAG.
getNode(ARMISD::VMLAVs, dl, MVT::i32,
A,
B));
17068 DAG.
getNode(ARMISD::VMLAVu, dl, MVT::i32,
A,
B));
17072 return DAG.
getNode(ARMISD::VMLAVps, dl, ResVT,
A,
B, Mask);
17075 return DAG.
getNode(ARMISD::VMLAVpu, dl, ResVT,
A,
B, Mask);
17078 return Create64bitNode(ARMISD::VMLALVps, {
A,
B, Mask});
17081 return Create64bitNode(ARMISD::VMLALVpu, {
A,
B, Mask});
17084 DAG.
getNode(ARMISD::VMLAVps, dl, MVT::i32,
A,
B, Mask));
17087 DAG.
getNode(ARMISD::VMLAVpu, dl, MVT::i32,
A,
B, Mask));
17090 return DAG.
getNode(ARMISD::VADDVs, dl, ResVT,
A);
17092 return DAG.
getNode(ARMISD::VADDVu, dl, ResVT,
A);
17094 return Create64bitNode(ARMISD::VADDLVs, {
A});
17096 return Create64bitNode(ARMISD::VADDLVu, {
A});
17099 DAG.
getNode(ARMISD::VADDVs, dl, MVT::i32,
A));
17102 DAG.
getNode(ARMISD::VADDVu, dl, MVT::i32,
A));
17105 return DAG.
getNode(ARMISD::VADDVps, dl, ResVT,
A, Mask);
17107 return DAG.
getNode(ARMISD::VADDVpu, dl, ResVT,
A, Mask);
17109 return Create64bitNode(ARMISD::VADDLVps, {
A, Mask});
17111 return Create64bitNode(ARMISD::VADDLVpu, {
A, Mask});
17114 DAG.
getNode(ARMISD::VADDVps, dl, MVT::i32,
A, Mask));
17117 DAG.
getNode(ARMISD::VADDVpu, dl, MVT::i32,
A, Mask));
17124 Op =
Op->getOperand(1);
17126 Op->getOperand(0)->getOpcode() ==
ISD::MUL) {
17128 if (
Mul->getOperand(0) ==
Mul->getOperand(1) &&
17145 unsigned VecOp =
N->getOperand(0).getValueType().isVector() ? 0 : 2;
17147 if (!Shuf || !Shuf->getOperand(1).isUndef())
17152 APInt SetElts(Mask.size(), 0);
17153 for (
int E : Mask) {
17161 if (
N->getNumOperands() != VecOp + 1) {
17163 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17169 if (
Op.getValueType().isVector())
17170 Ops.push_back(
Op.getOperand(0));
17181 unsigned IsTop =
N->getConstantOperandVal(2);
17188 if (Op0->
isUndef() && !IsTop)
17193 if ((Op1->
getOpcode() == ARMISD::VQMOVNs ||
17194 Op1->
getOpcode() == ARMISD::VQMOVNu) &&
17202 unsigned NumElts =
N->getValueType(0).getVectorNumElements();
17204 APInt Op0DemandedElts =
17205 IsTop ? Op1DemandedElts
17220 unsigned IsTop =
N->getConstantOperandVal(2);
17222 unsigned NumElts =
N->getValueType(0).getVectorNumElements();
17223 APInt Op0DemandedElts =
17235 EVT VT =
N->getValueType(0);
17242 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17243 LHS.getOperand(1).isUndef() &&
RHS.getOperand(1).isUndef() &&
17247 LHS.getOperand(0),
RHS.getOperand(0));
17262 int ShiftAmt =
C->getSExtValue();
17263 if (ShiftAmt == 0) {
17269 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17270 unsigned NewOpcode =
17271 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17286 unsigned IntNo =
N->getConstantOperandVal(0);
17297 case Intrinsic::arm_neon_vshifts:
17298 case Intrinsic::arm_neon_vshiftu:
17299 case Intrinsic::arm_neon_vrshifts:
17300 case Intrinsic::arm_neon_vrshiftu:
17301 case Intrinsic::arm_neon_vrshiftn:
17302 case Intrinsic::arm_neon_vqshifts:
17303 case Intrinsic::arm_neon_vqshiftu:
17304 case Intrinsic::arm_neon_vqshiftsu:
17305 case Intrinsic::arm_neon_vqshiftns:
17306 case Intrinsic::arm_neon_vqshiftnu:
17307 case Intrinsic::arm_neon_vqshiftnsu:
17308 case Intrinsic::arm_neon_vqrshiftns:
17309 case Intrinsic::arm_neon_vqrshiftnu:
17310 case Intrinsic::arm_neon_vqrshiftnsu: {
17311 EVT VT =
N->getOperand(1).getValueType();
17313 unsigned VShiftOpc = 0;
17316 case Intrinsic::arm_neon_vshifts:
17317 case Intrinsic::arm_neon_vshiftu:
17319 VShiftOpc = ARMISD::VSHLIMM;
17322 if (
isVShiftRImm(
N->getOperand(2), VT,
false,
true, Cnt)) {
17323 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17324 : ARMISD::VSHRuIMM);
17329 case Intrinsic::arm_neon_vrshifts:
17330 case Intrinsic::arm_neon_vrshiftu:
17335 case Intrinsic::arm_neon_vqshifts:
17336 case Intrinsic::arm_neon_vqshiftu:
17341 case Intrinsic::arm_neon_vqshiftsu:
17346 case Intrinsic::arm_neon_vrshiftn:
17347 case Intrinsic::arm_neon_vqshiftns:
17348 case Intrinsic::arm_neon_vqshiftnu:
17349 case Intrinsic::arm_neon_vqshiftnsu:
17350 case Intrinsic::arm_neon_vqrshiftns:
17351 case Intrinsic::arm_neon_vqrshiftnu:
17352 case Intrinsic::arm_neon_vqrshiftnsu:
17364 case Intrinsic::arm_neon_vshifts:
17365 case Intrinsic::arm_neon_vshiftu:
17368 case Intrinsic::arm_neon_vrshifts:
17369 VShiftOpc = ARMISD::VRSHRsIMM;
17371 case Intrinsic::arm_neon_vrshiftu:
17372 VShiftOpc = ARMISD::VRSHRuIMM;
17374 case Intrinsic::arm_neon_vrshiftn:
17375 VShiftOpc = ARMISD::VRSHRNIMM;
17377 case Intrinsic::arm_neon_vqshifts:
17378 VShiftOpc = ARMISD::VQSHLsIMM;
17380 case Intrinsic::arm_neon_vqshiftu:
17381 VShiftOpc = ARMISD::VQSHLuIMM;
17383 case Intrinsic::arm_neon_vqshiftsu:
17384 VShiftOpc = ARMISD::VQSHLsuIMM;
17386 case Intrinsic::arm_neon_vqshiftns:
17387 VShiftOpc = ARMISD::VQSHRNsIMM;
17389 case Intrinsic::arm_neon_vqshiftnu:
17390 VShiftOpc = ARMISD::VQSHRNuIMM;
17392 case Intrinsic::arm_neon_vqshiftnsu:
17393 VShiftOpc = ARMISD::VQSHRNsuIMM;
17395 case Intrinsic::arm_neon_vqrshiftns:
17396 VShiftOpc = ARMISD::VQRSHRNsIMM;
17398 case Intrinsic::arm_neon_vqrshiftnu:
17399 VShiftOpc = ARMISD::VQRSHRNuIMM;
17401 case Intrinsic::arm_neon_vqrshiftnsu:
17402 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17407 return DAG.
getNode(VShiftOpc, dl,
N->getValueType(0),
17408 N->getOperand(1), DAG.
getConstant(Cnt, dl, MVT::i32));
17411 case Intrinsic::arm_neon_vshiftins: {
17412 EVT VT =
N->getOperand(1).getValueType();
17414 unsigned VShiftOpc = 0;
17417 VShiftOpc = ARMISD::VSLIIMM;
17418 else if (
isVShiftRImm(
N->getOperand(3), VT,
false,
true, Cnt))
17419 VShiftOpc = ARMISD::VSRIIMM;
17425 return DAG.
getNode(VShiftOpc, dl,
N->getValueType(0),
17426 N->getOperand(1),
N->getOperand(2),
17430 case Intrinsic::arm_neon_vqrshifts:
17431 case Intrinsic::arm_neon_vqrshiftu:
17435 case Intrinsic::arm_neon_vbsl: {
17437 return DAG.
getNode(ARMISD::VBSP, dl,
N->getValueType(0),
N->getOperand(1),
17438 N->getOperand(2),
N->getOperand(3));
17440 case Intrinsic::arm_mve_vqdmlah:
17441 case Intrinsic::arm_mve_vqdmlash:
17442 case Intrinsic::arm_mve_vqrdmlah:
17443 case Intrinsic::arm_mve_vqrdmlash:
17444 case Intrinsic::arm_mve_vmla_n_predicated:
17445 case Intrinsic::arm_mve_vmlas_n_predicated:
17446 case Intrinsic::arm_mve_vqdmlah_predicated:
17447 case Intrinsic::arm_mve_vqdmlash_predicated:
17448 case Intrinsic::arm_mve_vqrdmlah_predicated:
17449 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17454 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
17461 case Intrinsic::arm_mve_minv:
17462 case Intrinsic::arm_mve_maxv:
17463 case Intrinsic::arm_mve_minav:
17464 case Intrinsic::arm_mve_maxav:
17465 case Intrinsic::arm_mve_minv_predicated:
17466 case Intrinsic::arm_mve_maxv_predicated:
17467 case Intrinsic::arm_mve_minav_predicated:
17468 case Intrinsic::arm_mve_maxav_predicated: {
17471 unsigned BitWidth =
N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17478 case Intrinsic::arm_mve_addv: {
17481 bool Unsigned =
N->getConstantOperandVal(2);
17482 unsigned Opc =
Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17486 case Intrinsic::arm_mve_addlv:
17487 case Intrinsic::arm_mve_addlv_predicated: {
17490 bool Unsigned =
N->getConstantOperandVal(2);
17491 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17492 (
Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) :
17493 (
Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps);
17496 for (
unsigned i = 1, e =
N->getNumOperands(); i < e; i++)
17498 Ops.push_back(
N->getOperand(i));
17519 EVT VT =
N->getValueType(0);
17521 if (ST->isThumb1Only() &&
N->getOpcode() ==
ISD::SHL && VT == MVT::i32 &&
17522 N->getOperand(0)->getOpcode() ==
ISD::AND &&
17523 N->getOperand(0)->hasOneUse()) {
17540 if (AndMask == 255 || AndMask == 65535)
17544 if (MaskedBits > ShiftAmt) {
17559 if (ST->hasMVEIntegerOps())
17564 switch (
N->getOpcode()) {
17570 return DAG.
getNode(ARMISD::VSHLIMM, dl, VT,
N->getOperand(0),
17577 if (
isVShiftRImm(
N->getOperand(1), VT,
false,
false, Cnt)) {
17578 unsigned VShiftOpc =
17579 (
N->getOpcode() ==
ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17581 return DAG.
getNode(VShiftOpc, dl, VT,
N->getOperand(0),
17597 if (!LD->isSimple() || !N0.
hasOneUse() || LD->isIndexed() ||
17600 EVT FromVT = LD->getValueType(0);
17601 EVT ToVT =
N->getValueType(0);
17608 unsigned NumElements = 0;
17609 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17611 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17613 if (NumElements == 0 ||
17623 SDValue BasePtr = LD->getBasePtr();
17624 Align Alignment = LD->getBaseAlign();
17645 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17646 Alignment, MMOFlags, AAInfo);
17652 if (FromEltVT == MVT::f16) {
17655 for (
unsigned i = 0; i < Loads.
size(); i++) {
17657 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, MVT::v8f16, Loads[i]);
17681 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17685 EVT VT =
N->getValueType(0);
17689 if (VT == MVT::i32 &&
17690 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17695 switch (
N->getOpcode()) {
17698 Opc = ARMISD::VGETLANEs;
17702 Opc = ARMISD::VGETLANEu;
17709 if (ST->hasMVEIntegerOps())
17718 if (ST->hasMVEFloatOps())
17729 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17733 EVT VT =
Op.getValueType();
17736 if (VT != MVT::i32 ||
17749 APInt MaxC = Max.getConstantOperandAPInt(1);
17752 !(MinC + 1).isPowerOf2())
17770 EVT VT =
N->getValueType(0);
17773 if (VT == MVT::i32)
17776 if (!ST->hasMVEIntegerOps())
17782 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17785 auto IsSignedSaturate = [&](
SDNode *Min,
SDNode *Max) {
17793 if (VT == MVT::v4i32)
17794 SaturateC =
APInt(32, (1 << 15) - 1,
true);
17796 SaturateC =
APInt(16, (1 << 7) - 1,
true);
17803 MaxC != ~SaturateC)
17808 if (IsSignedSaturate(
N, N0.
getNode())) {
17811 if (VT == MVT::v4i32) {
17812 HalfVT = MVT::v8i16;
17813 ExtVT = MVT::v4i16;
17815 HalfVT = MVT::v16i8;
17830 auto IsUnsignedSaturate = [&](
SDNode *Min) {
17836 if (VT == MVT::v4i32)
17837 SaturateC =
APInt(32, (1 << 16) - 1,
true);
17839 SaturateC =
APInt(16, (1 << 8) - 1,
true);
17848 if (IsUnsignedSaturate(
N)) {
17852 if (VT == MVT::v4i32) {
17853 HalfVT = MVT::v8i16;
17854 ExtConst = 0x0000FFFF;
17856 HalfVT = MVT::v16i8;
17878 const APInt *CV = &
C->getAPIntValue();
17896 SDValue Op0 = CMOV->getOperand(0);
17897 SDValue Op1 = CMOV->getOperand(1);
17898 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
17899 SDValue CmpZ = CMOV->getOperand(3);
17935 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
17942 if ((OrCI & Known.
Zero) != OrCI)
17948 EVT VT =
X.getValueType();
17949 unsigned BitInX = AndC->
logBase2();
17957 for (
unsigned BitInY = 0, NumActiveBits = OrCI.
getActiveBits();
17958 BitInY < NumActiveBits; ++BitInY) {
17959 if (OrCI[BitInY] == 0)
17962 Mask.setBit(BitInY);
17963 V = DAG.
getNode(ARMISD::BFI, dl, VT, V,
X,
17979 switch (
N->getOpcode()) {
17994 if (Const->isZero())
17996 else if (Const->isOne())
18004 unsigned IntOp =
N.getConstantOperandVal(1);
18005 if (IntOp != Intrinsic::test_start_loop_iterations &&
18006 IntOp != Intrinsic::loop_decrement_reg)
18032 bool Negate =
false;
18038 Cond =
N->getOperand(1);
18039 Dest =
N->getOperand(2);
18043 Cond =
N->getOperand(2);
18044 Dest =
N->getOperand(4);
18046 if (!Const->isOne() && !Const->isZero())
18048 Imm = Const->getZExtValue();
18076 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18077 "unsupported condition");
18082 unsigned IntOp =
Int->getConstantOperandVal(1);
18083 assert((
N->hasOneUse() &&
N->user_begin()->getOpcode() ==
ISD::BR) &&
18084 "expected single br user");
18085 SDNode *Br = *
N->user_begin();
18095 if (IntOp == Intrinsic::test_start_loop_iterations) {
18097 SDValue Setup = DAG.
getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18099 if (IsTrueIfZero(CC, Imm)) {
18101 Res = DAG.
getNode(ARMISD::WLS, dl, MVT::Other,
Ops);
18105 UpdateUncondBr(Br, Dest, DAG);
18107 SDValue Ops[] = {Chain, Setup, OtherTarget};
18108 Res = DAG.
getNode(ARMISD::WLS, dl, MVT::Other,
Ops);
18120 DAG.
getVTList(MVT::i32, MVT::Other), Args);
18124 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18128 if (
Target == OtherTarget)
18129 UpdateUncondBr(Br, Dest, DAG);
18135 return DAG.
getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18144 if (Cmp.getOpcode() != ARMISD::CMPZ)
18149 SDValue LHS = Cmp.getOperand(0);
18150 SDValue RHS = Cmp.getOperand(1);
18159 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18160 LHS->getOperand(0)->hasOneUse() &&
18164 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
18176 if (Cmp.getOpcode() != ARMISD::CMPZ)
18180 EVT VT =
N->getValueType(0);
18182 SDValue LHS = Cmp.getOperand(0);
18183 SDValue RHS = Cmp.getOperand(1);
18184 SDValue FalseVal =
N->getOperand(0);
18185 SDValue TrueVal =
N->getOperand(1);
18190 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18214 if (CC ==
ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18215 Res = DAG.
getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
18216 }
else if (CC ==
ARMCC::EQ && TrueVal == RHS) {
18219 Res = DAG.
getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
18224 if (CC ==
ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18227 return DAG.
getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18228 LHS->getOperand(2), LHS->getOperand(3));
18238 if (
N->getConstantOperandVal(2) ==
ARMCC::EQ ||
18242 if (
N->getConstantOperandVal(2) ==
ARMCC::NE)
18244 return DAG.
getNode(
N->getOpcode(),
SDLoc(
N), MVT::i32,
N->getOperand(0),
18253 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18286 Res = DAG.
getNode(ARMISD::CMOV, dl, VT,
Sub, TrueVal, ARMcc,
18298 Res = DAG.
getNode(ARMISD::CMOV, dl, VT,
Sub, FalseVal,
18318 const APInt *TrueConst;
18319 if (Subtarget->isThumb1Only() && CC ==
ARMCC::NE &&
18320 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18321 FalseVal.getOperand(1) == RHS) ||
18325 unsigned ShiftAmount = TrueConst->
logBase2();
18340 if (Known.
Zero == 0xfffffffe)
18343 else if (Known.
Zero == 0xffffff00)
18346 else if (Known.
Zero == 0xffff0000)
18359 EVT DstVT =
N->getValueType(0);
18362 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18363 EVT SrcVT = Src.getValueType();
18365 return DAG.
getNode(ARMISD::VDUP,
SDLoc(
N), DstVT, Src.getOperand(0));
18370 if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
18371 Src.getOperand(0).getValueType().getScalarSizeInBits() <=
18372 Src.getValueType().getScalarSizeInBits())
18373 Src = Src.getOperand(0);
18377 EVT SrcVT = Src.getValueType();
18378 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18379 Src.getOpcode() == ARMISD::VMVNIMM ||
18380 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18383 return DAG.
getNode(ARMISD::VECTOR_REG_CAST,
SDLoc(
N), DstVT, Src);
18397 EVT VT =
N->getValueType(0);
18405 if (
N->getNumOperands() == 2 &&
18409 N->getOperand(0).getOperand(1),
18410 N->getOperand(1).getOperand(0),
18411 N->getOperand(1).getOperand(1));
18414 if (
N->getNumOperands() == 2 &&
18420 if (S0->getOperand(0) ==
S1->getOperand(0) &&
18421 S0->getOperand(1) ==
S1->getOperand(1)) {
18424 Mask.append(
S1->getMask().begin(),
S1->getMask().end());
18428 ARMISD::VMOVN,
DL, VT,
18429 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, VT, S0->getOperand(0)),
18430 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, VT, S0->getOperand(1)),
18434 ARMISD::VMOVN,
DL, VT,
18435 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, VT, S0->getOperand(1)),
18436 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, VT, S0->getOperand(0)),
18444 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18445 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18446 (Op.getOpcode() == ISD::BITCAST &&
18447 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18450 for (
unsigned Op = 0;
Op <
N->getNumOperands();
Op++) {
18452 for (
unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18470 int NumIns =
N->getNumOperands();
18471 assert((NumIns == 2 || NumIns == 4) &&
18472 "Expected 2 or 4 inputs to an MVETrunc");
18474 if (
N->getNumOperands() == 4)
18478 for (
int I = 0;
I < NumIns;
I++) {
18480 ISD::ADD,
DL, StackPtr.getValueType(), StackPtr,
18485 Ptr, MPI, StoreVT,
Align(4));
18500 if (!LD || !LD->isSimple() || !N0.
hasOneUse() || LD->isIndexed())
18503 EVT FromVT = LD->getMemoryVT();
18504 EVT ToVT =
N->getValueType(0);
18511 unsigned NumElements = 0;
18512 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18514 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18516 assert(NumElements != 0);
18522 LD->getExtensionType() != NewExtType)
18529 SDValue BasePtr = LD->getBasePtr();
18530 Align Alignment = LD->getBaseAlign();
18549 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18550 Alignment, MMOFlags, AAInfo);
18566 EVT VT =
N->getValueType(0);
18568 assert(
N->getNumValues() == 2 &&
"Expected MVEEXT with 2 elements");
18569 assert((VT == MVT::v4i32 || VT == MVT::v8i16) &&
"Unexpected MVEEXT type");
18571 EVT ExtVT =
N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18573 auto Extend = [&](
SDValue V) {
18582 if (
N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18583 SDValue Ext = Extend(
N->getOperand(0));
18591 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18592 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18596 auto CheckInregMask = [&](
int Start,
int Offset) {
18598 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 +
Offset)
18604 if (CheckInregMask(0, 0))
18606 else if (CheckInregMask(0, 1))
18607 V0 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op0));
18608 else if (CheckInregMask(0, Mask.size()))
18610 else if (CheckInregMask(0, Mask.size() + 1))
18611 V0 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op1));
18616 V1 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op1));
18620 V1 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op0));
18627 if (
N->getOperand(0)->getOpcode() ==
ISD::LOAD)
18638 int NumOuts =
N->getNumValues();
18639 assert((NumOuts == 2 || NumOuts == 4) &&
18640 "Expected 2 or 4 outputs to an MVEEXT");
18641 EVT LoadVT =
N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18643 if (
N->getNumOperands() == 4)
18649 StackPtr, MPI,
Align(4));
18652 for (
int I = 0;
I < NumOuts;
I++) {
18654 ISD::ADD,
DL, StackPtr.getValueType(), StackPtr,
18655 DAG.
getConstant(
I * 16 / NumOuts,
DL, StackPtr.getValueType()));
18660 VT, Chain, Ptr, MPI, LoadVT,
Align(4));
18669 switch (
N->getOpcode()) {
18729 case ARMISD::BRCOND:
18733 case ARMISD::CSINC:
18734 case ARMISD::CSINV:
18735 case ARMISD::CSNEG:
18748 case ARMISD::PREDICATE_CAST:
18750 case ARMISD::VECTOR_REG_CAST:
18761 case ARMISD::VADDVs:
18762 case ARMISD::VADDVu:
18763 case ARMISD::VADDLVs:
18764 case ARMISD::VADDLVu:
18765 case ARMISD::VADDLVAs:
18766 case ARMISD::VADDLVAu:
18767 case ARMISD::VMLAVs:
18768 case ARMISD::VMLAVu:
18769 case ARMISD::VMLALVs:
18770 case ARMISD::VMLALVu:
18771 case ARMISD::VMLALVAs:
18772 case ARMISD::VMLALVAu:
18774 case ARMISD::VMOVN:
18776 case ARMISD::VQMOVNs:
18777 case ARMISD::VQMOVNu:
18779 case ARMISD::VQDMULH:
18785 case ARMISD::SMULWB: {
18786 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18792 case ARMISD::SMULWT: {
18793 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18799 case ARMISD::SMLALBB:
18800 case ARMISD::QADD16b:
18801 case ARMISD::QSUB16b:
18802 case ARMISD::UQADD16b:
18803 case ARMISD::UQSUB16b: {
18804 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18811 case ARMISD::SMLALBT: {
18812 unsigned LowWidth =
N->getOperand(0).getValueType().getSizeInBits();
18814 unsigned HighWidth =
N->getOperand(1).getValueType().getSizeInBits();
18821 case ARMISD::SMLALTB: {
18822 unsigned HighWidth =
N->getOperand(0).getValueType().getSizeInBits();
18824 unsigned LowWidth =
N->getOperand(1).getValueType().getSizeInBits();
18831 case ARMISD::SMLALTT: {
18832 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18839 case ARMISD::QADD8b:
18840 case ARMISD::QSUB8b:
18841 case ARMISD::UQADD8b:
18842 case ARMISD::UQSUB8b: {
18843 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18851 if (
N->getOperand(1) ==
N->getOperand(2))
18852 return N->getOperand(1);
18856 switch (
N->getConstantOperandVal(1)) {
18857 case Intrinsic::arm_neon_vld1:
18858 case Intrinsic::arm_neon_vld1x2:
18859 case Intrinsic::arm_neon_vld1x3:
18860 case Intrinsic::arm_neon_vld1x4:
18861 case Intrinsic::arm_neon_vld2:
18862 case Intrinsic::arm_neon_vld3:
18863 case Intrinsic::arm_neon_vld4:
18864 case Intrinsic::arm_neon_vld2lane:
18865 case Intrinsic::arm_neon_vld3lane:
18866 case Intrinsic::arm_neon_vld4lane:
18867 case Intrinsic::arm_neon_vld2dup:
18868 case Intrinsic::arm_neon_vld3dup:
18869 case Intrinsic::arm_neon_vld4dup:
18870 case Intrinsic::arm_neon_vst1:
18871 case Intrinsic::arm_neon_vst1x2:
18872 case Intrinsic::arm_neon_vst1x3:
18873 case Intrinsic::arm_neon_vst1x4:
18874 case Intrinsic::arm_neon_vst2:
18875 case Intrinsic::arm_neon_vst3:
18876 case Intrinsic::arm_neon_vst4:
18877 case Intrinsic::arm_neon_vst2lane:
18878 case Intrinsic::arm_neon_vst3lane:
18879 case Intrinsic::arm_neon_vst4lane:
18881 case Intrinsic::arm_mve_vld2q:
18882 case Intrinsic::arm_mve_vld4q:
18883 case Intrinsic::arm_mve_vst2q:
18884 case Intrinsic::arm_mve_vst4q:
18901 unsigned *
Fast)
const {
18907 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
18910 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
18912 if (AllowsUnaligned) {
18914 *
Fast = Subtarget->hasV7Ops();
18919 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
18923 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
18930 if (!Subtarget->hasMVEIntegerOps())
18934 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
18935 Ty == MVT::v2i1)) {
18943 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
18959 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
18960 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
18961 Ty == MVT::v2f64) {
18972 const AttributeList &FuncAttributes)
const {
18974 if ((
Op.isMemcpy() ||
Op.isZeroMemset()) && Subtarget->hasNEON() &&
18975 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
18977 if (
Op.size() >= 16 &&
18983 }
else if (
Op.size() >= 8 &&
19000 if (!SrcTy->isIntegerTy() || !DstTy->
isIntegerTy())
19002 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19004 return (SrcBits == 64 && DestBits == 32);
19013 return (SrcBits == 64 && DestBits == 32);
19049 return Subtarget->hasFullFP16();
19056 if (!Subtarget->hasMVEIntegerOps())
19075 if (Ld->isExpandingLoad())
19079 if (Subtarget->hasMVEIntegerOps())
19092 U->getOpcode() ==
ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19124bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(
const MachineFunction &MF,
19126 if (Subtarget->useSoftFloat())
19135 return Subtarget->hasMVEFloatOps();
19153 unsigned Scale = 1;
19170 if ((V & (Scale - 1)) != 0)
19179 if (VT.
isVector() && Subtarget->hasNEON())
19182 !Subtarget->hasMVEFloatOps())
19185 bool IsNeg =
false;
19191 unsigned NumBytes = std::max((
unsigned)VT.
getSizeInBits() / 8, 1U);
19194 if (VT.
isVector() && Subtarget->hasMVEIntegerOps()) {
19210 if (VT.
isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19216 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19246 default:
return false;
19265 int Scale = AM.
Scale;
19270 default:
return false;
19278 Scale = Scale & ~1;
19279 return Scale == 2 || Scale == 4 || Scale == 8;
19296 if (Scale & 1)
return false;
19303 const int Scale = AM.
Scale;
19313 return (Scale == 1) || (!AM.
HasBaseReg && Scale == 2);
19329 switch (AM.
Scale) {
19340 if (Subtarget->isThumb1Only())
19343 if (Subtarget->isThumb2())
19346 int Scale = AM.
Scale;
19348 default:
return false;
19352 if (Scale < 0) Scale = -Scale;
19360 if (Scale == 1 || (AM.
HasBaseReg && Scale == -1))
19373 if (Scale & 1)
return false;
19386 if (!Subtarget->isThumb())
19389 if (Subtarget->isThumb2())
19393 return Imm >= 0 && Imm <= 255;
19403 if (!Subtarget->isThumb())
19405 if (Subtarget->isThumb2())
19408 return AbsImm <= 255;
19443 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19447 int RHSC = (int)
RHS->getZExtValue();
19448 if (RHSC < 0 && RHSC > -256) {
19458 }
else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19461 int RHSC = (int)
RHS->getZExtValue();
19462 if (RHSC < 0 && RHSC > -0x1000) {
19504 int RHSC = (int)
RHS->getZExtValue();
19505 if (RHSC < 0 && RHSC > -0x100) {
19510 }
else if (RHSC > 0 && RHSC < 0x100) {
19521 bool isSEXTLoad,
bool IsMasked,
bool isLE,
19532 bool CanChangeType = isLE && !IsMasked;
19535 int RHSC = (int)
RHS->getZExtValue();
19537 auto IsInRange = [&](
int RHSC,
int Limit,
int Scale) {
19538 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19543 }
else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19554 if (VT == MVT::v4i16) {
19555 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19557 }
else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19558 if (IsInRange(RHSC, 0x80, 1))
19560 }
else if (Alignment >= 4 &&
19561 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19562 IsInRange(RHSC, 0x80, 4))
19564 else if (Alignment >= 2 &&
19565 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19566 IsInRange(RHSC, 0x80, 2))
19568 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19581 if (Subtarget->isThumb1Only())
19587 bool isSEXTLoad =
false;
19588 bool IsMasked =
false;
19590 Ptr = LD->getBasePtr();
19591 VT = LD->getMemoryVT();
19592 Alignment = LD->getAlign();
19595 Ptr = ST->getBasePtr();
19596 VT = ST->getMemoryVT();
19597 Alignment = ST->getAlign();
19599 Ptr = LD->getBasePtr();
19600 VT = LD->getMemoryVT();
19601 Alignment = LD->getAlign();
19605 Ptr = ST->getBasePtr();
19606 VT = ST->getMemoryVT();
19607 Alignment = ST->getAlign();
19613 bool isLegal =
false;
19615 isLegal = Subtarget->hasMVEIntegerOps() &&
19617 Ptr.
getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19618 Subtarget->isLittle(),
Base,
Offset, isInc, DAG);
19620 if (Subtarget->isThumb2())
19645 bool isSEXTLoad =
false, isNonExt;
19646 bool IsMasked =
false;
19648 VT = LD->getMemoryVT();
19649 Ptr = LD->getBasePtr();
19650 Alignment = LD->getAlign();
19654 VT = ST->getMemoryVT();
19655 Ptr = ST->getBasePtr();
19656 Alignment = ST->getAlign();
19657 isNonExt = !ST->isTruncatingStore();
19659 VT = LD->getMemoryVT();
19660 Ptr = LD->getBasePtr();
19661 Alignment = LD->getAlign();
19666 VT = ST->getMemoryVT();
19667 Ptr = ST->getBasePtr();
19668 Alignment = ST->getAlign();
19669 isNonExt = !ST->isTruncatingStore();
19674 if (Subtarget->isThumb1Only()) {
19677 assert(
Op->getValueType(0) == MVT::i32 &&
"Non-i32 post-inc op?!");
19678 if (
Op->getOpcode() !=
ISD::ADD || !isNonExt)
19681 if (!RHS || RHS->getZExtValue() != 4)
19683 if (Alignment <
Align(4))
19687 Base =
Op->getOperand(0);
19693 bool isLegal =
false;
19695 isLegal = Subtarget->hasMVEIntegerOps() &&
19700 if (Subtarget->isThumb2())
19714 !Subtarget->isThumb2())
19728 const APInt &DemandedElts,
19730 unsigned Depth)
const {
19733 switch (
Op.getOpcode()) {
19740 if (
Op.getResNo() == 0) {
19751 case ARMISD::CMOV: {
19766 case Intrinsic::arm_ldaex:
19767 case Intrinsic::arm_ldrex: {
19775 case ARMISD::BFI: {
19782 const APInt &Mask =
Op.getConstantOperandAPInt(2);
19783 Known.
Zero &= Mask;
19787 case ARMISD::VGETLANEs:
19788 case ARMISD::VGETLANEu: {
19789 const SDValue &SrcSV =
Op.getOperand(0);
19795 "VGETLANE index out of bounds");
19800 EVT VT =
Op.getValueType();
19806 if (
Op.getOpcode() == ARMISD::VGETLANEs)
19807 Known = Known.
sext(DstSz);
19809 Known = Known.
zext(DstSz);
19814 case ARMISD::VMOVrh: {
19817 Known = KnownOp.
zext(32);
19820 case ARMISD::CSINC:
19821 case ARMISD::CSINV:
19822 case ARMISD::CSNEG: {
19830 if (
Op.getOpcode() == ARMISD::CSINC)
19833 else if (
Op.getOpcode() == ARMISD::CSINV)
19835 else if (
Op.getOpcode() == ARMISD::CSNEG)
19842 case ARMISD::VORRIMM:
19843 case ARMISD::VBICIMM: {
19844 unsigned Encoded =
Op.getConstantOperandVal(1);
19845 unsigned DecEltBits = 0;
19848 unsigned EltBits =
Op.getScalarValueSizeInBits();
19849 if (EltBits != DecEltBits) {
19858 bool IsVORR =
Op.getOpcode() == ARMISD::VORRIMM;
19859 APInt Imm(DecEltBits, DecodedVal);
19861 Known.
One = IsVORR ? (KnownLHS.
One | Imm) : (KnownLHS.
One & ~Imm);
19862 Known.
Zero = IsVORR ? (KnownLHS.
Zero & ~Imm) : (KnownLHS.
Zero | Imm);
19880 EVT VT =
Op.getValueType();
19886 assert(VT == MVT::i32 &&
"Unexpected integer type");
19893 unsigned Mask =
C->getZExtValue();
19896 unsigned ShrunkMask = Mask & Demanded;
19897 unsigned ExpandedMask = Mask | ~Demanded;
19901 if (ShrunkMask == 0)
19907 if (ExpandedMask == ~0U)
19910 auto IsLegalMask = [ShrunkMask, ExpandedMask](
unsigned Mask) ->
bool {
19911 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
19913 auto UseMask = [Mask,
Op, VT, &TLO](
unsigned NewMask) ->
bool {
19914 if (NewMask == Mask)
19923 if (IsLegalMask(0xFF))
19924 return UseMask(0xFF);
19927 if (IsLegalMask(0xFFFF))
19928 return UseMask(0xFFFF);
19932 if (ShrunkMask < 256)
19933 return UseMask(ShrunkMask);
19937 if ((
int)ExpandedMask <= -2 && (
int)ExpandedMask >= -256)
19938 return UseMask(ExpandedMask);
19953 unsigned Depth)
const {
19954 unsigned Opc =
Op.getOpcode();
19958 case ARMISD::LSRL: {
19962 if (
Op.getResNo() == 0 && !
Op->hasAnyUseOfValue(1) &&
19964 unsigned ShAmt =
Op->getConstantOperandVal(2);
19974 case ARMISD::VBICIMM: {
19976 unsigned ModImm =
Op.getConstantOperandVal(1);
19977 unsigned EltBits = 0;
19979 if ((OriginalDemandedBits & Mask) == 0)
19985 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
20000 if (!Subtarget->hasVFP2Base())
20004 if (ConstraintVT.
isVector() && Subtarget->hasNEON() &&
20016 unsigned S = Constraint.
size();
20018 switch (Constraint[0]) {
20030 }
else if (S == 2) {
20031 switch (Constraint[0]) {
20048 Value *CallOperandVal =
info.CallOperandVal;
20051 if (!CallOperandVal)
20055 switch (*constraint) {
20061 if (Subtarget->isThumb())
20076 if (PR == 0 || VT == MVT::Other)
20078 if (ARM::SPRRegClass.
contains(PR))
20079 return VT != MVT::f32 && VT != MVT::f16 && VT != MVT::i32;
20080 if (ARM::DPRRegClass.
contains(PR))
20085using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20089 switch (Constraint.
size()) {
20092 switch (Constraint[0]) {
20094 if (Subtarget->isThumb())
20095 return RCPair(0U, &ARM::tGPRRegClass);
20096 return RCPair(0U, &ARM::GPRRegClass);
20098 if (Subtarget->isThumb())
20099 return RCPair(0U, &ARM::hGPRRegClass);
20102 if (Subtarget->isThumb1Only())
20103 return RCPair(0U, &ARM::tGPRRegClass);
20104 return RCPair(0U, &ARM::GPRRegClass);
20106 if (VT == MVT::Other)
20108 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20109 return RCPair(0U, &ARM::SPRRegClass);
20111 return RCPair(0U, &ARM::DPRRegClass);
20113 return RCPair(0U, &ARM::QPRRegClass);
20116 if (VT == MVT::Other)
20118 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20119 return RCPair(0U, &ARM::SPR_8RegClass);
20121 return RCPair(0U, &ARM::DPR_8RegClass);
20123 return RCPair(0U, &ARM::QPR_8RegClass);
20126 if (VT == MVT::Other)
20128 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20129 return RCPair(0U, &ARM::SPRRegClass);
20131 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20133 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20139 if (Constraint[0] ==
'T') {
20140 switch (Constraint[1]) {
20144 return RCPair(0U, &ARM::tGPREvenRegClass);
20146 return RCPair(0U, &ARM::tGPROddRegClass);
20155 if (
StringRef(
"{cc}").equals_insensitive(Constraint))
20156 return std::make_pair(
unsigned(ARM::CPSR), &ARM::CCRRegClass);
20160 return {0,
nullptr};
20168 std::vector<SDValue> &
Ops,
20173 if (Constraint.
size() != 1)
20176 char ConstraintLetter = Constraint[0];
20177 switch (ConstraintLetter) {
20180 case 'I':
case 'J':
case 'K':
case 'L':
20181 case 'M':
case 'N':
case 'O':
20186 int64_t CVal64 =
C->getSExtValue();
20187 int CVal = (int) CVal64;
20190 if (CVal != CVal64)
20193 switch (ConstraintLetter) {
20197 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20198 if (CVal >= 0 && CVal <= 65535)
20202 if (Subtarget->isThumb1Only()) {
20205 if (CVal >= 0 && CVal <= 255)
20207 }
else if (Subtarget->isThumb2()) {
20221 if (Subtarget->isThumb1Only()) {
20226 if (CVal >= -255 && CVal <= -1)
20232 if (CVal >= -4095 && CVal <= 4095)
20238 if (Subtarget->isThumb1Only()) {
20245 }
else if (Subtarget->isThumb2()) {
20265 if (Subtarget->isThumb1Only()) {
20268 if (CVal >= -7 && CVal < 7)
20270 }
else if (Subtarget->isThumb2()) {
20290 if (Subtarget->isThumb1Only()) {
20293 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20299 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20305 if (Subtarget->isThumb1Only()) {
20307 if (CVal >= 0 && CVal <= 31)
20313 if (Subtarget->isThumb1Only()) {
20316 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20325 if (Result.getNode()) {
20326 Ops.push_back(Result);
20336 "Unhandled Opcode in getDivRemLibcall");
20342 case MVT::i8: LC =
isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
break;
20343 case MVT::i16: LC =
isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
break;
20344 case MVT::i32: LC =
isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
break;
20345 case MVT::i64: LC =
isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
break;
20354 "Unhandled Opcode in getDivRemArgList");
20358 for (
unsigned i = 0, e =
N->getNumOperands(); i != e; ++i) {
20359 EVT ArgVT =
N->getOperand(i).getValueType();
20364 Args.push_back(Entry);
20372 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20373 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20374 Subtarget->isTargetFuchsia() || Subtarget->isTargetWindows()) &&
20375 "Register-based DivRem lowering only");
20376 unsigned Opcode =
Op->getOpcode();
20378 "Invalid opcode for Div/Rem lowering");
20380 EVT VT =
Op->getValueType(0);
20402 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20403 : Subtarget->hasDivideInARMMode();
20404 if (hasDivide &&
Op->getValueType(0).isSimple() &&
20405 Op->getSimpleValueType(0) == MVT::i32) {
20407 const SDValue Dividend =
Op->getOperand(0);
20408 const SDValue Divisor =
Op->getOperand(1);
20409 SDValue Div = DAG.
getNode(DivOpcode, dl, VT, Dividend, Divisor);
20413 SDValue Values[2] = {Div, Rem};
20431 if (Subtarget->isTargetWindows())
20434 TargetLowering::CallLoweringInfo CLI(DAG);
20443 std::pair<SDValue, SDValue> CallInfo =
LowerCallTo(CLI);
20444 return CallInfo.first;
20450 EVT VT =
N->getValueType(0);
20456 Result[0], Result[1]);
20460 std::vector<Type*> RetTyParams;
20461 Type *RetTyElement;
20471 RetTyParams.push_back(RetTyElement);
20472 RetTyParams.push_back(RetTyElement);
20487 if (Subtarget->isTargetWindows())
20498 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
20501 SDNode *ResNode = CallResult.first.getNode();
20508 assert(Subtarget->isTargetWindows() &&
"unsupported target platform");
20516 "no-stack-arg-probe")) {
20520 Chain =
SP.getValue(1);
20537 SDVTList NodeTys = DAG.
getVTList(MVT::Other, MVT::Glue);
20538 Chain = DAG.
getNode(ARMISD::WIN__CHKSTK,
DL, NodeTys, Chain, Glue);
20548 bool IsStrict =
Op->isStrictFPOpcode();
20549 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
20550 const unsigned DstSz =
Op.getValueType().getSizeInBits();
20552 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20553 "Unexpected type for custom-lowering FP_EXTEND");
20555 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20556 "With both FP DP and 16, any FP conversion is legal!");
20558 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20559 "With FP16, 16 to 32 conversion is legal!");
20562 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20567 Loc,
Op.getValueType(), SrcVal);
20582 for (
unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20583 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20584 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20585 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20589 {DstVT, MVT::Other}, {Chain, SrcVal});
20596 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20597 "Unexpected type for custom-lowering FP_EXTEND");
20598 std::tie(SrcVal, Chain) =
makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20603 return IsStrict ? DAG.
getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20607 bool IsStrict =
Op->isStrictFPOpcode();
20609 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
20611 EVT DstVT =
Op.getValueType();
20612 const unsigned DstSz =
Op.getValueType().getSizeInBits();
20615 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20616 "Unexpected type for custom-lowering FP_ROUND");
20618 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20619 "With both FP DP and 16, any FP conversion is legal!");
20624 if (SrcSz == 32 && Subtarget->hasFP16())
20629 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20630 "Unexpected type for custom-lowering FP_ROUND");
20634 std::tie(Result, Chain) =
makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20646 if (v == 0xffffffff)
20658 bool ForCodeSize)
const {
20659 if (!Subtarget->hasVFP3Base())
20661 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20663 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20666 if (VT == MVT::f32)
20668 if (VT == MVT::f64 && Subtarget->hasFP64())
20681 case Intrinsic::arm_neon_vld1:
20682 case Intrinsic::arm_neon_vld2:
20683 case Intrinsic::arm_neon_vld3:
20684 case Intrinsic::arm_neon_vld4:
20685 case Intrinsic::arm_neon_vld2lane:
20686 case Intrinsic::arm_neon_vld3lane:
20687 case Intrinsic::arm_neon_vld4lane:
20688 case Intrinsic::arm_neon_vld2dup:
20689 case Intrinsic::arm_neon_vld3dup:
20690 case Intrinsic::arm_neon_vld4dup: {
20693 auto &
DL =
I.getDataLayout();
20694 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
20696 Info.ptrVal =
I.getArgOperand(0);
20698 Value *AlignArg =
I.getArgOperand(
I.arg_size() - 1);
20704 case Intrinsic::arm_neon_vld1x2:
20705 case Intrinsic::arm_neon_vld1x3:
20706 case Intrinsic::arm_neon_vld1x4: {
20709 auto &
DL =
I.getDataLayout();
20710 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
20712 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
20714 Info.align =
I.getParamAlign(
I.arg_size() - 1).valueOrOne();
20719 case Intrinsic::arm_neon_vst1:
20720 case Intrinsic::arm_neon_vst2:
20721 case Intrinsic::arm_neon_vst3:
20722 case Intrinsic::arm_neon_vst4:
20723 case Intrinsic::arm_neon_vst2lane:
20724 case Intrinsic::arm_neon_vst3lane:
20725 case Intrinsic::arm_neon_vst4lane: {
20728 auto &
DL =
I.getDataLayout();
20729 unsigned NumElts = 0;
20730 for (
unsigned ArgI = 1, ArgE =
I.arg_size(); ArgI < ArgE; ++ArgI) {
20731 Type *ArgTy =
I.getArgOperand(ArgI)->getType();
20734 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
20737 Info.ptrVal =
I.getArgOperand(0);
20739 Value *AlignArg =
I.getArgOperand(
I.arg_size() - 1);
20745 case Intrinsic::arm_neon_vst1x2:
20746 case Intrinsic::arm_neon_vst1x3:
20747 case Intrinsic::arm_neon_vst1x4: {
20750 auto &
DL =
I.getDataLayout();
20751 unsigned NumElts = 0;
20752 for (
unsigned ArgI = 1, ArgE =
I.arg_size(); ArgI < ArgE; ++ArgI) {
20753 Type *ArgTy =
I.getArgOperand(ArgI)->getType();
20756 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
20759 Info.ptrVal =
I.getArgOperand(0);
20761 Info.align =
I.getParamAlign(0).valueOrOne();
20766 case Intrinsic::arm_mve_vld2q:
20767 case Intrinsic::arm_mve_vld4q: {
20771 unsigned Factor =
Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
20773 Info.ptrVal =
I.getArgOperand(0);
20780 case Intrinsic::arm_mve_vst2q:
20781 case Intrinsic::arm_mve_vst4q: {
20784 Type *VecTy =
I.getArgOperand(1)->getType();
20785 unsigned Factor =
Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
20787 Info.ptrVal =
I.getArgOperand(0);
20794 case Intrinsic::arm_mve_vldr_gather_base:
20795 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
20797 Info.ptrVal =
nullptr;
20799 Info.align =
Align(1);
20803 case Intrinsic::arm_mve_vldr_gather_base_wb:
20804 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
20806 Info.ptrVal =
nullptr;
20807 Info.memVT =
MVT::getVT(
I.getType()->getContainedType(0));
20808 Info.align =
Align(1);
20812 case Intrinsic::arm_mve_vldr_gather_offset:
20813 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
20815 Info.ptrVal =
nullptr;
20820 Info.align =
Align(1);
20824 case Intrinsic::arm_mve_vstr_scatter_base:
20825 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
20827 Info.ptrVal =
nullptr;
20828 Info.memVT =
MVT::getVT(
I.getArgOperand(2)->getType());
20829 Info.align =
Align(1);
20833 case Intrinsic::arm_mve_vstr_scatter_base_wb:
20834 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
20836 Info.ptrVal =
nullptr;
20837 Info.memVT =
MVT::getVT(
I.getArgOperand(2)->getType());
20838 Info.align =
Align(1);
20842 case Intrinsic::arm_mve_vstr_scatter_offset:
20843 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
20845 Info.ptrVal =
nullptr;
20850 Info.align =
Align(1);
20854 case Intrinsic::arm_ldaex:
20855 case Intrinsic::arm_ldrex: {
20856 auto &
DL =
I.getDataLayout();
20857 Type *ValTy =
I.getParamElementType(0);
20860 Info.ptrVal =
I.getArgOperand(0);
20862 Info.align =
DL.getABITypeAlign(ValTy);
20866 case Intrinsic::arm_stlex:
20867 case Intrinsic::arm_strex: {
20868 auto &
DL =
I.getDataLayout();
20869 Type *ValTy =
I.getParamElementType(1);
20872 Info.ptrVal =
I.getArgOperand(1);
20874 Info.align =
DL.getABITypeAlign(ValTy);
20878 case Intrinsic::arm_stlexd:
20879 case Intrinsic::arm_strexd:
20881 Info.memVT = MVT::i64;
20882 Info.ptrVal =
I.getArgOperand(2);
20884 Info.align =
Align(8);
20888 case Intrinsic::arm_ldaexd:
20889 case Intrinsic::arm_ldrexd:
20891 Info.memVT = MVT::i64;
20892 Info.ptrVal =
I.getArgOperand(0);
20894 Info.align =
Align(8);
20909 assert(Ty->isIntegerTy());
20911 unsigned Bits = Ty->getPrimitiveSizeInBits();
20912 if (Bits == 0 || Bits > 32)
20918 unsigned Index)
const {
20928 if (!Subtarget->hasDataBarrier()) {
20932 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
20933 Value*
args[6] = {Builder.getInt32(15), Builder.getInt32(0),
20934 Builder.getInt32(0), Builder.getInt32(7),
20935 Builder.getInt32(10), Builder.getInt32(5)};
20936 return Builder.CreateIntrinsic(Intrinsic::arm_mcr,
args);
20946 return Builder.CreateIntrinsic(Intrinsic::arm_dmb, CDomain);
20967 if (Subtarget->preferISHSTBarriers())
21000 bool has64BitAtomicStore;
21001 if (Subtarget->isMClass())
21002 has64BitAtomicStore =
false;
21003 else if (Subtarget->isThumb())
21004 has64BitAtomicStore = Subtarget->hasV7Ops();
21006 has64BitAtomicStore = Subtarget->hasV6Ops();
21008 unsigned Size =
SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21022 bool has64BitAtomicLoad;
21023 if (Subtarget->isMClass())
21024 has64BitAtomicLoad =
false;
21025 else if (Subtarget->isThumb())
21026 has64BitAtomicLoad = Subtarget->hasV7Ops();
21028 has64BitAtomicLoad = Subtarget->hasV6Ops();
21044 if (Subtarget->isMClass())
21045 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21046 else if (Subtarget->isThumb())
21047 hasAtomicRMW = Subtarget->hasV7Ops();
21049 hasAtomicRMW = Subtarget->hasV6Ops();
21050 if (
Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21073 bool HasAtomicCmpXchg;
21074 if (Subtarget->isMClass())
21075 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21076 else if (Subtarget->isThumb())
21077 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21079 HasAtomicCmpXchg = Subtarget->hasV6Ops();