22#include "llvm/IR/IntrinsicsAArch64.h"
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
49 AArch64DAGToDAGISel() =
delete;
66 std::vector<SDValue> &OutOps)
override;
68 template <
signed Low,
signed High,
signed Scale>
76 return SelectShiftedRegister(
N,
false, Reg, Shift);
79 return SelectShiftedRegister(
N,
true, Reg, Shift);
82 return SelectAddrModeIndexed7S(
N, 1,
Base, OffImm);
85 return SelectAddrModeIndexed7S(
N, 2,
Base, OffImm);
88 return SelectAddrModeIndexed7S(
N, 4,
Base, OffImm);
91 return SelectAddrModeIndexed7S(
N, 8,
Base, OffImm);
94 return SelectAddrModeIndexed7S(
N, 16,
Base, OffImm);
97 return SelectAddrModeIndexedBitWidth(
N,
true, 9, 16,
Base, OffImm);
100 return SelectAddrModeIndexedBitWidth(
N,
false, 6, 16,
Base, OffImm);
103 return SelectAddrModeIndexed(
N, 1,
Base, OffImm);
106 return SelectAddrModeIndexed(
N, 2,
Base, OffImm);
109 return SelectAddrModeIndexed(
N, 4,
Base, OffImm);
112 return SelectAddrModeIndexed(
N, 8,
Base, OffImm);
115 return SelectAddrModeIndexed(
N, 16,
Base, OffImm);
118 return SelectAddrModeUnscaled(
N, 1,
Base, OffImm);
121 return SelectAddrModeUnscaled(
N, 2,
Base, OffImm);
124 return SelectAddrModeUnscaled(
N, 4,
Base, OffImm);
127 return SelectAddrModeUnscaled(
N, 8,
Base, OffImm);
130 return SelectAddrModeUnscaled(
N, 16,
Base, OffImm);
132 template <
unsigned Size,
unsigned Max>
136 bool Found = SelectAddrModeIndexed(
N,
Size,
Base, OffImm);
138 if (
auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
139 int64_t
C = CI->getSExtValue();
147 OffImm = CurDAG->getTargetConstant(0,
SDLoc(
N), MVT::i64);
154 return SelectAddrModeWRO(
N, Width / 8,
Base,
Offset, SignExtend, DoShift);
160 return SelectAddrModeXRO(
N, Width / 8,
Base,
Offset, SignExtend, DoShift);
165 N =
N->getOperand(0);
167 !isa<ConstantSDNode>(
N->getOperand(1)))
169 EVT VT =
N->getValueType(0);
170 EVT LVT =
N->getOperand(0).getValueType();
171 unsigned Index =
N->getConstantOperandVal(1);
175 Res =
N->getOperand(0);
183 EVT VT =
Op.getValueType();
184 unsigned ShtAmt =
N->getConstantOperandVal(1);
191 Op.getOperand(1).getConstantOperandVal(0)
192 <<
Op.getOperand(1).getConstantOperandVal(1));
194 isa<ConstantSDNode>(
Op.getOperand(1).getOperand(0)))
196 Op.getOperand(1).getConstantOperandVal(0));
200 if (Imm != 1ULL << (ShtAmt - 1))
203 Res1 =
Op.getOperand(0);
204 Res2 = CurDAG->getTargetConstant(ShtAmt,
SDLoc(
N), MVT::i32);
208 bool SelectDupZeroOrUndef(
SDValue N) {
209 switch(
N->getOpcode()) {
214 auto Opnd0 =
N->getOperand(0);
229 switch(
N->getOpcode()) {
232 auto Opnd0 =
N->getOperand(0);
244 bool SelectDupNegativeZero(
SDValue N) {
245 switch(
N->getOpcode()) {
249 return Const && Const->isZero() && Const->isNegative();
256 template<MVT::SimpleValueType VT>
258 return SelectSVEAddSubImm(
N, VT, Imm, Shift);
261 template <MVT::SimpleValueType VT>
263 return SelectSVECpyDupImm(
N, VT, Imm, Shift);
266 template <MVT::SimpleValueType VT,
bool Invert = false>
268 return SelectSVELogicalImm(
N, VT, Imm, Invert);
271 template <MVT::SimpleValueType VT>
273 return SelectSVEArithImm(
N, VT, Imm);
276 template <
unsigned Low,
unsigned High,
bool AllowSaturation = false>
278 return SelectSVEShiftImm(
N,
Low,
High, AllowSaturation, Imm);
285 EVT EltVT =
N->getValueType(0).getVectorElementType();
286 return SelectSVEShiftImm(
N->getOperand(0), 1,
292 template<
signed Min,
signed Max,
signed Scale,
bool Shift>
294 if (!isa<ConstantSDNode>(
N))
297 int64_t MulImm = cast<ConstantSDNode>(
N)->getSExtValue();
299 MulImm = 1LL << MulImm;
301 if ((MulImm % std::abs(Scale)) != 0)
305 if ((MulImm >= Min) && (MulImm <= Max)) {
306 Imm = CurDAG->getTargetConstant(MulImm,
SDLoc(
N), MVT::i32);
313 template <
signed Max,
signed Scale>
315 if (!isa<ConstantSDNode>(
N))
318 int64_t MulImm = cast<ConstantSDNode>(
N)->getSExtValue();
320 if (MulImm >= 0 && MulImm <= Max) {
322 Imm = CurDAG->getTargetConstant(MulImm,
SDLoc(
N), MVT::i32);
330 if (
auto *CI = dyn_cast<ConstantSDNode>(
N)) {
332 Imm = CurDAG->getRegister(BaseReg +
C, MVT::Other);
355 const unsigned SubRegs[]);
357 void SelectTable(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
bool isExt);
359 bool tryIndexedLoad(
SDNode *
N);
361 bool trySelectStackSlotTagP(
SDNode *
N);
364 void SelectLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
366 void SelectPostLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
368 void SelectLoadLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
369 void SelectPostLoadLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
370 void SelectPredicatedLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Scale,
371 unsigned Opc_rr,
unsigned Opc_ri,
372 bool IsIntr =
false);
373 void SelectContiguousMultiVectorLoad(
SDNode *
N,
unsigned NumVecs,
374 unsigned Scale,
unsigned Opc_ri,
376 void SelectDestructiveMultiIntrinsic(
SDNode *
N,
unsigned NumVecs,
377 bool IsZmMulti,
unsigned Opcode,
378 bool HasPred =
false);
379 void SelectPExtPair(
SDNode *
N,
unsigned Opc);
380 void SelectWhilePair(
SDNode *
N,
unsigned Opc);
381 void SelectCVTIntrinsic(
SDNode *
N,
unsigned NumVecs,
unsigned Opcode);
382 void SelectClamp(
SDNode *
N,
unsigned NumVecs,
unsigned Opcode);
383 void SelectUnaryMultiIntrinsic(
SDNode *
N,
unsigned NumOutVecs,
384 bool IsTupleInput,
unsigned Opc);
385 void SelectFrintFromVT(
SDNode *
N,
unsigned NumVecs,
unsigned Opcode);
387 template <
unsigned MaxIdx,
unsigned Scale>
388 void SelectMultiVectorMove(
SDNode *
N,
unsigned NumVecs,
unsigned BaseReg,
393 template <
int64_t Min,
int64_t Max>
397 template <
unsigned Scale>
399 return SelectSVERegRegAddrMode(
N, Scale,
Base,
Offset);
402 template <
unsigned MaxIdx,
unsigned Scale>
407 void SelectStore(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
408 void SelectPostStore(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
409 void SelectStoreLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
410 void SelectPostStoreLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
411 void SelectPredicatedStore(
SDNode *
N,
unsigned NumVecs,
unsigned Scale,
412 unsigned Opc_rr,
unsigned Opc_ri);
413 std::tuple<unsigned, SDValue, SDValue>
414 findAddrModeSVELoadStore(
SDNode *
N,
unsigned Opc_rr,
unsigned Opc_ri,
418 bool tryBitfieldExtractOp(
SDNode *
N);
419 bool tryBitfieldExtractOpFromSExt(
SDNode *
N);
420 bool tryBitfieldInsertOp(
SDNode *
N);
421 bool tryBitfieldInsertInZeroOp(
SDNode *
N);
422 bool tryShiftAmountMod(
SDNode *
N);
424 bool tryReadRegister(
SDNode *
N);
425 bool tryWriteRegister(
SDNode *
N);
427 bool trySelectCastFixedLengthToScalableVector(
SDNode *
N);
428 bool trySelectCastScalableToFixedLengthVector(
SDNode *
N);
431#include "AArch64GenDAGISel.inc"
439 return SelectAddrModeIndexedBitWidth(
N,
true, 7,
Size,
Base, OffImm);
441 bool SelectAddrModeIndexedBitWidth(
SDValue N,
bool IsSignedImm,
unsigned BW,
454 bool isWorthFoldingALU(
SDValue V,
bool LSL =
false)
const;
455 bool isWorthFoldingAddr(
SDValue V)
const;
456 bool SelectExtendedSHL(
SDValue N,
unsigned Size,
bool WantExtend,
459 template<
unsigned RegW
idth>
461 return SelectCVTFixedPosOperand(
N, FixedPos, RegWidth);
464 bool SelectCVTFixedPosOperand(
SDValue N,
SDValue &FixedPos,
unsigned Width);
466 template<
unsigned RegW
idth>
468 return SelectCVTFixedPosRecipOperand(
N, FixedPos, RegWidth);
474 bool SelectCMP_SWAP(
SDNode *
N);
482 bool AllowSaturation,
SDValue &Imm);
490 bool SelectAllActivePredicate(
SDValue N);
495char AArch64DAGToDAGISel::ID = 0;
503 Imm =
C->getZExtValue();
520 return N->getOpcode() == Opc &&
531 return Imm == ImmExpected;
535bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
537 std::vector<SDValue> &OutOps) {
538 switch(ConstraintID) {
541 case InlineAsm::ConstraintCode::m:
542 case InlineAsm::ConstraintCode::o:
543 case InlineAsm::ConstraintCode::Q:
549 SDValue RC = CurDAG->getTargetConstant(TRC->
getID(), dl, MVT::i64);
551 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
552 dl,
Op.getValueType(),
554 OutOps.push_back(NewOp);
570 if (!isa<ConstantSDNode>(
N.getNode()))
573 uint64_t Immed = cast<ConstantSDNode>(
N.getNode())->getZExtValue();
576 if (Immed >> 12 == 0) {
578 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
586 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
587 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
600 if (!isa<ConstantSDNode>(
N.getNode()))
604 uint64_t Immed = cast<ConstantSDNode>(
N.getNode())->getZExtValue();
612 if (
N.getValueType() == MVT::i32)
615 Immed = ~Immed + 1ULL;
616 if (Immed & 0xFFFFFFFFFF000000ULL)
619 Immed &= 0xFFFFFFULL;
620 return SelectArithImmed(CurDAG->getConstant(Immed,
SDLoc(
N), MVT::i32), Val,
627 switch (
N.getOpcode()) {
646 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
649 unsigned ShiftVal = CSD->getZExtValue();
658 if (!isa<MemSDNode>(*UI))
660 if (!isa<MemSDNode>(*UII))
667bool AArch64DAGToDAGISel::isWorthFoldingAddr(
SDValue V)
const {
670 if (CurDAG->shouldOptForSize() ||
V.hasOneUse())
674 if (Subtarget->hasAddrLSLFast() &&
V.getOpcode() ==
ISD::SHL &&
677 if (Subtarget->hasAddrLSLFast() &&
V.getOpcode() ==
ISD::ADD) {
692bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(
SDValue N,
SDValue &Reg,
694 EVT VT =
N.getValueType();
695 if (VT != MVT::i32 && VT != MVT::i64)
698 if (
N->getOpcode() !=
ISD::AND || !
N->hasOneUse())
704 unsigned LHSOpcode =
LHS->getOpcode();
718 unsigned LowZBits, MaskLen;
722 unsigned BitWidth =
N.getValueSizeInBits();
729 if (LowZBits <= ShiftAmtC || (
BitWidth != LowZBits + MaskLen))
732 NewShiftC = LowZBits - ShiftAmtC;
733 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
739 NewShiftC = LowZBits + ShiftAmtC;
752 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
754 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
758 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC,
DL, VT);
760 Reg =
SDValue(CurDAG->getMachineNode(NewShiftOp,
DL, VT,
LHS->getOperand(0),
761 NewShiftAmt, BitWidthMinus1),
764 Shift = CurDAG->getTargetConstant(ShVal,
DL, MVT::i32);
776 SrcVT = cast<VTSDNode>(
N.getOperand(1))->getVT();
778 SrcVT =
N.getOperand(0).getValueType();
780 if (!IsLoadStore && SrcVT == MVT::i8)
782 else if (!IsLoadStore && SrcVT == MVT::i16)
784 else if (SrcVT == MVT::i32)
786 assert(SrcVT != MVT::i64 &&
"extend from 64-bits?");
791 EVT SrcVT =
N.getOperand(0).getValueType();
792 if (!IsLoadStore && SrcVT == MVT::i8)
794 else if (!IsLoadStore && SrcVT == MVT::i16)
796 else if (SrcVT == MVT::i32)
798 assert(SrcVT != MVT::i64 &&
"extend from 64-bits?");
826bool AArch64DAGToDAGISel::isWorthFoldingALU(
SDValue V,
bool LSL)
const {
829 if (CurDAG->shouldOptForSize() ||
V.hasOneUse())
834 if (LSL && Subtarget->hasALULSLFast() &&
V.getOpcode() ==
ISD::SHL &&
835 V.getConstantOperandVal(1) <= 4 &&
848bool AArch64DAGToDAGISel::SelectShiftedRegister(
SDValue N,
bool AllowROR,
850 if (SelectShiftedRegisterFromAnd(
N, Reg, Shift))
860 unsigned BitSize =
N.getValueSizeInBits();
861 unsigned Val =
RHS->getZExtValue() & (BitSize - 1);
864 Reg =
N.getOperand(0);
865 Shift = CurDAG->getTargetConstant(ShVal,
SDLoc(
N), MVT::i32);
866 return isWorthFoldingALU(
N,
true);
877 if (
N.getValueType() == MVT::i32)
885template<
signed Low,
signed High,
signed Scale>
887 if (!isa<ConstantSDNode>(
N))
890 int64_t MulImm = cast<ConstantSDNode>(
N)->getSExtValue();
891 if ((MulImm % std::abs(Scale)) == 0) {
892 int64_t RDVLImm = MulImm / Scale;
893 if ((RDVLImm >=
Low) && (RDVLImm <=
High)) {
894 Imm = CurDAG->getTargetConstant(RDVLImm,
SDLoc(
N), MVT::i32);
904bool AArch64DAGToDAGISel::SelectArithExtendedRegister(
SDValue N,
SDValue &Reg,
906 unsigned ShiftVal = 0;
921 Reg =
N.getOperand(0).getOperand(0);
927 Reg =
N.getOperand(0);
932 unsigned Opc =
N.getOpcode();
933 return Opc !=
ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
950 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal),
SDLoc(
N),
952 return isWorthFoldingALU(
N);
957bool AArch64DAGToDAGISel::SelectArithUXTXRegister(
SDValue N,
SDValue &Reg,
959 unsigned ShiftVal = 0;
973 Reg =
N.getOperand(0);
974 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal),
SDLoc(
N),
976 return isWorthFoldingALU(
N);
985 for (
auto *
Use :
N->uses()) {
1003bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(
SDValue N,
bool IsSignedImm,
1004 unsigned BW,
unsigned Size,
1011 int FI = cast<FrameIndexSDNode>(
N)->getIndex();
1013 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1019 if (CurDAG->isBaseWithConstantOffset(
N)) {
1020 if (
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(
N.getOperand(1))) {
1022 int64_t RHSC =
RHS->getSExtValue();
1024 int64_t
Range = 0x1LL << (BW - 1);
1026 if ((RHSC & (
Size - 1)) == 0 && RHSC >= -(
Range << Scale) &&
1027 RHSC < (Range << Scale)) {
1028 Base =
N.getOperand(0);
1030 int FI = cast<FrameIndexSDNode>(
Base)->getIndex();
1033 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1042 if ((RHSC & (
Size - 1)) == 0 && RHSC < (Range << Scale)) {
1043 Base =
N.getOperand(0);
1045 int FI = cast<FrameIndexSDNode>(
Base)->getIndex();
1048 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1059 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1066bool AArch64DAGToDAGISel::SelectAddrModeIndexed(
SDValue N,
unsigned Size,
1072 int FI = cast<FrameIndexSDNode>(
N)->getIndex();
1074 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1080 dyn_cast<GlobalAddressSDNode>(
N.getOperand(1).getNode());
1081 Base =
N.getOperand(0);
1082 OffImm =
N.getOperand(1);
1091 if (CurDAG->isBaseWithConstantOffset(
N)) {
1092 if (
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(
N.getOperand(1))) {
1093 int64_t RHSC = (int64_t)
RHS->getZExtValue();
1095 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
1096 Base =
N.getOperand(0);
1098 int FI = cast<FrameIndexSDNode>(
Base)->getIndex();
1101 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1109 if (SelectAddrModeUnscaled(
N,
Size,
Base, OffImm))
1117 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1126bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(
SDValue N,
unsigned Size,
1129 if (!CurDAG->isBaseWithConstantOffset(
N))
1131 if (
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(
N.getOperand(1))) {
1132 int64_t RHSC =
RHS->getSExtValue();
1134 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 &&
1137 if (RHSC >= -256 && RHSC < 256) {
1138 Base =
N.getOperand(0);
1140 int FI = cast<FrameIndexSDNode>(
Base)->getIndex();
1142 Base = CurDAG->getTargetFrameIndex(
1145 OffImm = CurDAG->getTargetConstant(RHSC,
SDLoc(
N), MVT::i64);
1155 CurDAG->
getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1162bool AArch64DAGToDAGISel::SelectExtendedSHL(
SDValue N,
unsigned Size,
1182 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1188 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1191 return isWorthFoldingAddr(
N);
1194bool AArch64DAGToDAGISel::SelectAddrModeWRO(
SDValue N,
unsigned Size,
1206 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1214 if (!isa<MemSDNode>(*UI))
1219 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(
N);
1222 if (IsExtendedRegisterWorthFolding &&
RHS.getOpcode() ==
ISD::SHL &&
1223 SelectExtendedSHL(RHS,
Size,
true,
Offset, SignExtend)) {
1225 DoShift = CurDAG->getTargetConstant(
true, dl, MVT::i32);
1230 if (IsExtendedRegisterWorthFolding &&
LHS.getOpcode() ==
ISD::SHL &&
1231 SelectExtendedSHL(LHS,
Size,
true,
Offset, SignExtend)) {
1233 DoShift = CurDAG->getTargetConstant(
true, dl, MVT::i32);
1238 DoShift = CurDAG->getTargetConstant(
false, dl, MVT::i32);
1242 if (IsExtendedRegisterWorthFolding &&
1249 if (isWorthFoldingAddr(LHS))
1254 if (IsExtendedRegisterWorthFolding &&
1261 if (isWorthFoldingAddr(RHS))
1273 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1276 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1278 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1279 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1283bool AArch64DAGToDAGISel::SelectAddrModeXRO(
SDValue N,
unsigned Size,
1298 if (!isa<MemSDNode>(*UI))
1313 if (isa<ConstantSDNode>(RHS)) {
1314 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1319 if ((ImmOff %
Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1325 CurDAG->getMachineNode(AArch64::MOVi64imm,
DL, MVT::i64, Ops);
1328 N = CurDAG->getNode(
ISD::ADD,
DL, MVT::i64, LHS, MOVIV);
1332 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(
N);
1335 if (IsExtendedRegisterWorthFolding &&
RHS.getOpcode() ==
ISD::SHL &&
1336 SelectExtendedSHL(RHS,
Size,
false,
Offset, SignExtend)) {
1338 DoShift = CurDAG->getTargetConstant(
true,
DL, MVT::i32);
1343 if (IsExtendedRegisterWorthFolding &&
LHS.getOpcode() ==
ISD::SHL &&
1344 SelectExtendedSHL(LHS,
Size,
false,
Offset, SignExtend)) {
1346 DoShift = CurDAG->getTargetConstant(
true,
DL, MVT::i32);
1353 SignExtend = CurDAG->getTargetConstant(
false,
DL, MVT::i32);
1354 DoShift = CurDAG->getTargetConstant(
false,
DL, MVT::i32);
1360 static const unsigned RegClassIDs[] = {
1361 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1362 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1363 AArch64::dsub2, AArch64::dsub3};
1369 static const unsigned RegClassIDs[] = {
1370 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1371 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1372 AArch64::qsub2, AArch64::qsub3};
1378 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1379 AArch64::ZPR3RegClassID,
1380 AArch64::ZPR4RegClassID};
1381 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1382 AArch64::zsub2, AArch64::zsub3};
1392 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1393 AArch64::ZPR4Mul4RegClassID};
1394 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1395 AArch64::zsub2, AArch64::zsub3};
1400 const unsigned RegClassIDs[],
1401 const unsigned SubRegs[]) {
1404 if (Regs.
size() == 1)
1415 CurDAG->getTargetConstant(RegClassIDs[Regs.
size() - 2],
DL, MVT::i32));
1418 for (
unsigned i = 0; i < Regs.
size(); ++i) {
1420 Ops.
push_back(CurDAG->getTargetConstant(SubRegs[i],
DL, MVT::i32));
1424 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, MVT::Untyped, Ops);
1428void AArch64DAGToDAGISel::SelectTable(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
1431 EVT VT =
N->getValueType(0);
1433 unsigned ExtOff = isExt;
1436 unsigned Vec0Off = ExtOff + 1;
1438 N->op_begin() + Vec0Off + NumVecs);
1445 Ops.
push_back(
N->getOperand(NumVecs + ExtOff + 1));
1446 ReplaceNode(
N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1449bool AArch64DAGToDAGISel::tryIndexedLoad(
SDNode *
N) {
1451 if (
LD->isUnindexed())
1453 EVT VT =
LD->getMemoryVT();
1454 EVT DstVT =
N->getValueType(0);
1461 unsigned Opcode = 0;
1464 bool InsertTo64 =
false;
1466 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1467 else if (VT == MVT::i32) {
1469 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1471 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1473 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1479 }
else if (VT == MVT::i16) {
1481 if (DstVT == MVT::i64)
1482 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1484 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1486 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1487 InsertTo64 = DstVT == MVT::i64;
1492 }
else if (VT == MVT::i8) {
1494 if (DstVT == MVT::i64)
1495 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1497 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1499 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1500 InsertTo64 = DstVT == MVT::i64;
1505 }
else if (VT == MVT::f16) {
1506 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1507 }
else if (VT == MVT::bf16) {
1508 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1509 }
else if (VT == MVT::f32) {
1510 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1512 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1514 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1522 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1524 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1529 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {
MemOp});
1534 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1536 SDValue(CurDAG->getMachineNode(
1537 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1538 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1543 ReplaceUses(
SDValue(
N, 0), LoadedVal);
1546 CurDAG->RemoveDeadNode(
N);
1550void AArch64DAGToDAGISel::SelectLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
1551 unsigned SubRegIdx) {
1553 EVT VT =
N->getValueType(0);
1559 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1561 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1563 for (
unsigned i = 0; i < NumVecs; ++i)
1565 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1571 if (
auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(
N)) {
1573 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {
MemOp});
1576 CurDAG->RemoveDeadNode(
N);
1579void AArch64DAGToDAGISel::SelectPostLoad(
SDNode *
N,
unsigned NumVecs,
1580 unsigned Opc,
unsigned SubRegIdx) {
1582 EVT VT =
N->getValueType(0);
1589 const EVT ResTys[] = {MVT::i64,
1590 MVT::Untyped, MVT::Other};
1592 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1600 ReplaceUses(
SDValue(
N, 0), SuperReg);
1602 for (
unsigned i = 0; i < NumVecs; ++i)
1604 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1608 CurDAG->RemoveDeadNode(
N);
1614std::tuple<unsigned, SDValue, SDValue>
1615AArch64DAGToDAGISel::findAddrModeSVELoadStore(
SDNode *
N,
unsigned Opc_rr,
1621 SDValue NewOffset = OldOffset;
1623 const bool IsRegImm = SelectAddrModeIndexedSVE<-8, 7>(
1624 N, OldBase, NewBase, NewOffset);
1628 const bool IsRegReg =
1629 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1632 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1645template <SelectTypeKind Kind>
1656 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1661 if (EltVT != MVT::i1)
1665 if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64)
1694void AArch64DAGToDAGISel::SelectPExtPair(
SDNode *
N,
unsigned Opc) {
1697 if (
Imm->getZExtValue() > 1)
1701 EVT VT =
N->getValueType(0);
1702 SDValue Ops[] = {
N->getOperand(1),
N->getOperand(2)};
1703 SDNode *WhilePair = CurDAG->getMachineNode(Opc,
DL, MVT::Untyped, Ops);
1706 for (
unsigned I = 0;
I < 2; ++
I)
1707 ReplaceUses(
SDValue(
N,
I), CurDAG->getTargetExtractSubreg(
1708 AArch64::psub0 +
I,
DL, VT, SuperReg));
1710 CurDAG->RemoveDeadNode(
N);
1713void AArch64DAGToDAGISel::SelectWhilePair(
SDNode *
N,
unsigned Opc) {
1715 EVT VT =
N->getValueType(0);
1717 SDValue Ops[] = {
N->getOperand(1),
N->getOperand(2)};
1719 SDNode *WhilePair = CurDAG->getMachineNode(Opc,
DL, MVT::Untyped, Ops);
1722 for (
unsigned I = 0;
I < 2; ++
I)
1723 ReplaceUses(
SDValue(
N,
I), CurDAG->getTargetExtractSubreg(
1724 AArch64::psub0 +
I,
DL, VT, SuperReg));
1726 CurDAG->RemoveDeadNode(
N);
1729void AArch64DAGToDAGISel::SelectCVTIntrinsic(
SDNode *
N,
unsigned NumVecs,
1731 EVT VT =
N->getValueType(0);
1733 SDValue Ops = createZTuple(Regs);
1737 for (
unsigned i = 0; i < NumVecs; ++i)
1738 ReplaceUses(
SDValue(
N, i), CurDAG->getTargetExtractSubreg(
1739 AArch64::zsub0 + i,
DL, VT, SuperReg));
1741 CurDAG->RemoveDeadNode(
N);
1744void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(
SDNode *
N,
1749 assert(Opcode != 0 &&
"Unexpected opcode");
1752 EVT VT =
N->getValueType(0);
1753 unsigned FirstVecIdx = HasPred ? 2 : 1;
1755 auto GetMultiVecOperand = [=](
unsigned StartIdx) {
1757 N->op_begin() + StartIdx + NumVecs);
1758 return createZMulTuple(Regs);
1761 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1765 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1767 Zm =
N->getOperand(NumVecs + FirstVecIdx);
1771 Intrinsic = CurDAG->getMachineNode(Opcode,
DL, MVT::Untyped,
1772 N->getOperand(1), Zdn, Zm);
1774 Intrinsic = CurDAG->getMachineNode(Opcode,
DL, MVT::Untyped, Zdn, Zm);
1776 for (
unsigned i = 0; i < NumVecs; ++i)
1777 ReplaceUses(
SDValue(
N, i), CurDAG->getTargetExtractSubreg(
1778 AArch64::zsub0 + i,
DL, VT, SuperReg));
1780 CurDAG->RemoveDeadNode(
N);
1783void AArch64DAGToDAGISel::SelectPredicatedLoad(
SDNode *
N,
unsigned NumVecs,
1784 unsigned Scale,
unsigned Opc_ri,
1785 unsigned Opc_rr,
bool IsIntr) {
1786 assert(Scale < 4 &&
"Invalid scaling value.");
1788 EVT VT =
N->getValueType(0);
1794 std::tie(Opc,
Base,
Offset) = findAddrModeSVELoadStore(
1795 N, Opc_rr, Opc_ri,
N->getOperand(IsIntr ? 3 : 2),
1796 CurDAG->getTargetConstant(0,
DL, MVT::i64), Scale);
1798 SDValue Ops[] = {
N->getOperand(IsIntr ? 2 : 1),
1802 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1804 SDNode *
Load = CurDAG->getMachineNode(Opc,
DL, ResTys, Ops);
1806 for (
unsigned i = 0; i < NumVecs; ++i)
1807 ReplaceUses(
SDValue(
N, i), CurDAG->getTargetExtractSubreg(
1808 AArch64::zsub0 + i,
DL, VT, SuperReg));
1811 unsigned ChainIdx = NumVecs;
1813 CurDAG->RemoveDeadNode(
N);
1816void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(
SDNode *
N,
1821 assert(Scale < 4 &&
"Invalid scaling value.");
1823 EVT VT =
N->getValueType(0);
1831 findAddrModeSVELoadStore(
N, Opc_rr, Opc_ri,
Base,
Offset, Scale);
1837 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1839 SDNode *
Load = CurDAG->getMachineNode(Opc,
DL, ResTys, Ops);
1841 for (
unsigned i = 0; i < NumVecs; ++i)
1842 ReplaceUses(
SDValue(
N, i), CurDAG->getTargetExtractSubreg(
1843 AArch64::zsub0 + i,
DL, VT, SuperReg));
1846 unsigned ChainIdx = NumVecs;
1848 CurDAG->RemoveDeadNode(
N);
1851void AArch64DAGToDAGISel::SelectFrintFromVT(
SDNode *
N,
unsigned NumVecs,
1853 if (
N->getValueType(0) != MVT::nxv4f32)
1855 SelectUnaryMultiIntrinsic(
N, NumVecs,
true, Opcode);
1858void AArch64DAGToDAGISel::SelectClamp(
SDNode *
N,
unsigned NumVecs,
1861 EVT VT =
N->getValueType(0);
1864 SDValue Zd = createZMulTuple(Regs);
1865 SDValue Zn =
N->getOperand(1 + NumVecs);
1866 SDValue Zm =
N->getOperand(2 + NumVecs);
1872 for (
unsigned i = 0; i < NumVecs; ++i)
1873 ReplaceUses(
SDValue(
N, i), CurDAG->getTargetExtractSubreg(
1874 AArch64::zsub0 + i,
DL, VT, SuperReg));
1876 CurDAG->RemoveDeadNode(
N);
1906template <
unsigned MaxIdx,
unsigned Scale>
1907void AArch64DAGToDAGISel::SelectMultiVectorMove(
SDNode *
N,
unsigned NumVecs,
1908 unsigned BaseReg,
unsigned Op) {
1909 unsigned TileNum = 0;
1910 if (BaseReg != AArch64::ZA)
1911 TileNum = cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue();
1917 if (BaseReg == AArch64::ZA)
1918 SliceBase =
N->getOperand(2);
1920 SliceBase =
N->getOperand(3);
1922 if (!SelectSMETileSlice(SliceBase, MaxIdx,
Base,
Offset, Scale))
1928 SDNode *Mov = CurDAG->getMachineNode(
Op,
DL, {MVT::Untyped, MVT::Other}, Ops);
1930 EVT VT =
N->getValueType(0);
1931 for (
unsigned I = 0;
I < NumVecs; ++
I)
1933 CurDAG->getTargetExtractSubreg(AArch64::zsub0 +
I,
DL, VT,
1936 unsigned ChainIdx = NumVecs;
1938 CurDAG->RemoveDeadNode(
N);
1941void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(
SDNode *
N,
1942 unsigned NumOutVecs,
1946 EVT VT =
N->getValueType(0);
1947 unsigned NumInVecs =
N->getNumOperands() - 1;
1951 assert((NumInVecs == 2 || NumInVecs == 4) &&
1952 "Don't know how to handle multi-register input!");
1954 N->op_begin() + 1 + NumInVecs);
1958 for (
unsigned I = 0;
I < NumInVecs;
I++)
1962 SDNode *Res = CurDAG->getMachineNode(Opc,
DL, MVT::Untyped, Ops);
1965 for (
unsigned I = 0;
I < NumOutVecs;
I++)
1966 ReplaceUses(
SDValue(
N,
I), CurDAG->getTargetExtractSubreg(
1967 AArch64::zsub0 +
I,
DL, VT, SuperReg));
1968 CurDAG->RemoveDeadNode(
N);
1971void AArch64DAGToDAGISel::SelectStore(
SDNode *
N,
unsigned NumVecs,
1974 EVT VT =
N->getOperand(2)->getValueType(0);
1981 SDValue Ops[] = {RegSeq,
N->getOperand(NumVecs + 2),
N->getOperand(0)};
1982 SDNode *St = CurDAG->getMachineNode(Opc, dl,
N->getValueType(0), Ops);
1986 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
1991void AArch64DAGToDAGISel::SelectPredicatedStore(
SDNode *
N,
unsigned NumVecs,
1992 unsigned Scale,
unsigned Opc_rr,
1998 SDValue RegSeq = createZTuple(Regs);
2003 std::tie(Opc,
Base,
Offset) = findAddrModeSVELoadStore(
2004 N, Opc_rr, Opc_ri,
N->getOperand(NumVecs + 3),
2005 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2007 SDValue Ops[] = {RegSeq,
N->getOperand(NumVecs + 2),
2011 SDNode *St = CurDAG->getMachineNode(Opc, dl,
N->getValueType(0), Ops);
2023 if (
auto FINode = dyn_cast<FrameIndexSDNode>(
N)) {
2024 int FI = FINode->getIndex();
2026 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2033void AArch64DAGToDAGISel::SelectPostStore(
SDNode *
N,
unsigned NumVecs,
2036 EVT VT =
N->getOperand(2)->getValueType(0);
2037 const EVT ResTys[] = {MVT::i64,
2046 N->getOperand(NumVecs + 1),
2047 N->getOperand(NumVecs + 2),
2049 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2089void AArch64DAGToDAGISel::SelectLoadLane(
SDNode *
N,
unsigned NumVecs,
2092 EVT VT =
N->getValueType(0);
2100 WidenVector(*CurDAG));
2104 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2107 cast<ConstantSDNode>(
N->getOperand(NumVecs + 2))->getZExtValue();
2109 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2110 N->getOperand(NumVecs + 3),
N->getOperand(0)};
2111 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2115 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2116 AArch64::qsub2, AArch64::qsub3 };
2117 for (
unsigned i = 0; i < NumVecs; ++i) {
2118 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2125 CurDAG->RemoveDeadNode(
N);
2128void AArch64DAGToDAGISel::SelectPostLoadLane(
SDNode *
N,
unsigned NumVecs,
2131 EVT VT =
N->getValueType(0);
2139 WidenVector(*CurDAG));
2143 const EVT ResTys[] = {MVT::i64,
2147 cast<ConstantSDNode>(
N->getOperand(NumVecs + 1))->getZExtValue();
2150 CurDAG->getTargetConstant(LaneNo, dl,
2152 N->getOperand(NumVecs + 2),
2153 N->getOperand(NumVecs + 3),
2155 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2167 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2168 AArch64::qsub2, AArch64::qsub3 };
2169 for (
unsigned i = 0; i < NumVecs; ++i) {
2170 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2180 CurDAG->RemoveDeadNode(
N);
2183void AArch64DAGToDAGISel::SelectStoreLane(
SDNode *
N,
unsigned NumVecs,
2186 EVT VT =
N->getOperand(2)->getValueType(0);
2194 WidenVector(*CurDAG));
2199 cast<ConstantSDNode>(
N->getOperand(NumVecs + 2))->getZExtValue();
2201 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2202 N->getOperand(NumVecs + 3),
N->getOperand(0)};
2203 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2207 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
2212void AArch64DAGToDAGISel::SelectPostStoreLane(
SDNode *
N,
unsigned NumVecs,
2215 EVT VT =
N->getOperand(2)->getValueType(0);
2223 WidenVector(*CurDAG));
2227 const EVT ResTys[] = {MVT::i64,
2231 cast<ConstantSDNode>(
N->getOperand(NumVecs + 1))->getZExtValue();
2233 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2234 N->getOperand(NumVecs + 2),
2235 N->getOperand(NumVecs + 3),
2237 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2241 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
2248 unsigned &LSB,
unsigned &MSB,
2249 unsigned NumberOfIgnoredLowBits,
2250 bool BiggerPattern) {
2252 "N must be a AND operation to call this function");
2254 EVT VT =
N->getValueType(0);
2259 assert((VT == MVT::i32 || VT == MVT::i64) &&
2260 "Type checking must have been done before calling this function");
2274 const SDNode *Op0 =
N->getOperand(0).getNode();
2278 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2281 if (AndImm & (AndImm + 1))
2284 bool ClampMSB =
false;
2304 ClampMSB = (VT == MVT::i32);
2305 }
else if (BiggerPattern) {
2311 Opd0 =
N->getOperand(0);
2317 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.
getSizeInBits())) {
2320 <<
": Found large shift immediate, this should not happen\n"));
2326 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2327 : llvm::countr_one<uint64_t>(AndImm)) -
2334 MSB = MSB > 31 ? 31 : MSB;
2336 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2341 SDValue &Opd0,
unsigned &Immr,
2345 EVT VT =
N->getValueType(0);
2347 assert((VT == MVT::i32 || VT == MVT::i64) &&
2348 "Type checking must have been done before calling this function");
2352 Op =
Op->getOperand(0);
2353 VT =
Op->getValueType(0);
2362 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2366 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2367 Opd0 =
Op.getOperand(0);
2369 Imms = ShiftImm + Width - 1;
2397 Opd0 =
N->getOperand(0).getOperand(0);
2407 Opc =
N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2414 unsigned &Immr,
unsigned &Imms,
2415 bool BiggerPattern) {
2417 "N must be a SHR/SRA operation to call this function");
2419 EVT VT =
N->getValueType(0);
2424 assert((VT == MVT::i32 || VT == MVT::i64) &&
2425 "Type checking must have been done before calling this function");
2435 Opd0 =
N->getOperand(0).getOperand(0);
2436 }
else if (VT == MVT::i32 &&
N->getOpcode() ==
ISD::SRL &&
2442 Opd0 =
N->getOperand(0).getOperand(0);
2445 assert(VT == MVT::i64 &&
"the promoted type should be i64");
2446 }
else if (BiggerPattern) {
2450 Opd0 =
N->getOperand(0);
2459 <<
": Found large shift immediate, this should not happen\n"));
2468 "bad amount in shift node!");
2469 int immr = SrlImm - ShlImm;
2474 Opc =
N->getOpcode() ==
ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2476 Opc =
N->getOpcode() ==
ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2480bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(
SDNode *
N) {
2483 EVT VT =
N->getValueType(0);
2484 EVT NarrowVT =
N->getOperand(0)->getValueType(0);
2485 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2496 unsigned Immr = ShiftImm;
2498 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2499 CurDAG->getTargetConstant(Imms, dl, VT)};
2500 CurDAG->SelectNodeTo(
N, AArch64::SBFMXri, VT, Ops);
2505 SDValue &Opd0,
unsigned &Immr,
unsigned &Imms,
2506 unsigned NumberOfIgnoredLowBits = 0,
2507 bool BiggerPattern =
false) {
2508 if (
N->getValueType(0) != MVT::i32 &&
N->getValueType(0) != MVT::i64)
2511 switch (
N->getOpcode()) {
2513 if (!
N->isMachineOpcode())
2518 NumberOfIgnoredLowBits, BiggerPattern);
2527 unsigned NOpc =
N->getMachineOpcode();
2531 case AArch64::SBFMWri:
2532 case AArch64::UBFMWri:
2533 case AArch64::SBFMXri:
2534 case AArch64::UBFMXri:
2536 Opd0 =
N->getOperand(0);
2537 Immr = cast<ConstantSDNode>(
N->getOperand(1).getNode())->getZExtValue();
2538 Imms = cast<ConstantSDNode>(
N->getOperand(2).getNode())->getZExtValue();
2545bool AArch64DAGToDAGISel::tryBitfieldExtractOp(
SDNode *
N) {
2546 unsigned Opc, Immr, Imms;
2551 EVT VT =
N->getValueType(0);
2556 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2557 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2558 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2560 SDNode *
BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2561 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2567 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2568 CurDAG->getTargetConstant(Imms, dl, VT)};
2569 CurDAG->SelectNodeTo(
N, Opc, VT, Ops);
2578 unsigned NumberOfIgnoredHighBits,
EVT VT) {
2579 assert((VT == MVT::i32 || VT == MVT::i64) &&
2580 "i32 or i64 mask type expected!");
2586 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2587 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2610 cast<const ConstantSDNode>(
Op.getOperand(1).getNode())->getZExtValue();
2620 APInt OpUsefulBits(UsefulBits);
2624 OpUsefulBits <<= MSB - Imm + 1;
2629 OpUsefulBits <<= Imm;
2631 OpUsefulBits <<= MSB + 1;
2634 OpUsefulBits <<= OpUsefulBits.
getBitWidth() - Imm;
2640 UsefulBits &= OpUsefulBits;
2646 cast<const ConstantSDNode>(
Op.getOperand(1).getNode())->getZExtValue();
2648 cast<const ConstantSDNode>(
Op.getOperand(2).getNode())->getZExtValue();
2656 cast<const ConstantSDNode>(
Op.getOperand(2).getNode())->getZExtValue();
2657 APInt Mask(UsefulBits);
2658 Mask.clearAllBits();
2666 Mask.lshrInPlace(ShiftAmt);
2672 Mask.lshrInPlace(ShiftAmt);
2684 cast<const ConstantSDNode>(
Op.getOperand(2).getNode())->getZExtValue();
2686 cast<const ConstantSDNode>(
Op.getOperand(3).getNode())->getZExtValue();
2688 APInt OpUsefulBits(UsefulBits);
2702 OpUsefulBits <<= Width;
2705 if (
Op.getOperand(1) == Orig) {
2707 Mask = ResultUsefulBits & OpUsefulBits;
2711 if (
Op.getOperand(0) == Orig)
2713 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2719 OpUsefulBits <<= Width;
2721 OpUsefulBits <<= LSB;
2723 if (
Op.getOperand(1) == Orig) {
2725 Mask = ResultUsefulBits & OpUsefulBits;
2726 Mask.lshrInPlace(LSB);
2729 if (
Op.getOperand(0) == Orig)
2730 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2747 case AArch64::ANDSWri:
2748 case AArch64::ANDSXri:
2749 case AArch64::ANDWri:
2750 case AArch64::ANDXri:
2754 case AArch64::UBFMWri:
2755 case AArch64::UBFMXri:
2758 case AArch64::ORRWrs:
2759 case AArch64::ORRXrs:
2764 case AArch64::BFMWri:
2765 case AArch64::BFMXri:
2768 case AArch64::STRBBui:
2769 case AArch64::STURBBi:
2775 case AArch64::STRHHui:
2776 case AArch64::STURHHi:
2789 unsigned Bitwidth =
Op.getScalarValueSizeInBits();
2791 UsefulBits =
APInt(Bitwidth, 0);
2800 UsersUsefulBits |= UsefulBitsForUse;
2805 UsefulBits &= UsersUsefulBits;
2815 EVT VT =
Op.getValueType();
2818 unsigned UBFMOpc =
BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2821 if (ShlAmount > 0) {
2824 UBFMOpc, dl, VT,
Op,
2829 assert(ShlAmount < 0 &&
"expected right shift");
2830 int ShrAmount = -ShlAmount;
2856 bool BiggerPattern,
SDValue &Src,
2857 int &DstLSB,
int &Width) {
2858 EVT VT =
Op.getValueType();
2867 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2871 switch (
Op.getOpcode()) {
2876 NonZeroBits, Src, DstLSB, Width);
2879 NonZeroBits, Src, DstLSB, Width);
2892 EVT VT =
Op.getValueType();
2893 assert((VT == MVT::i32 || VT == MVT::i64) &&
2894 "Caller guarantees VT is one of i32 or i64");
2907 assert((~AndImm & NonZeroBits) == 0 &&
2908 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2937 if (!BiggerPattern && !AndOp0.
hasOneUse())
2956 <<
"Found large Width in bit-field-positioning -- this indicates no "
2957 "proper combining / constant folding was performed\n");
2966 if (ShlImm !=
uint64_t(DstLSB) && !BiggerPattern)
2981 "Op.getNode() should be a SHL node to call this function");
2983 "Op.getNode() should shift ShlImm to call this function");
2990 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3014 EVT VT =
Op.getValueType();
3015 assert((VT == MVT::i32 || VT == MVT::i64) &&
3016 "Caller guarantees that type is i32 or i64");
3023 if (!BiggerPattern && !
Op.hasOneUse())
3032 if (ShlImm !=
uint64_t(DstLSB) && !BiggerPattern)
3040 assert(VT == MVT::i32 || VT == MVT::i64);
3051 EVT VT =
N->getValueType(0);
3052 if (VT != MVT::i32 && VT != MVT::i64)
3070 if (!
And.hasOneUse() ||
3080 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3087 if ((OrImm & NotKnownZero) != 0) {
3099 unsigned ImmS = Width - 1;
3105 bool IsBFI = LSB != 0;
3110 unsigned OrChunks = 0, BFIChunks = 0;
3111 for (
unsigned Shift = 0; Shift <
BitWidth; Shift += 16) {
3112 if (((OrImm >> Shift) & 0xFFFF) != 0)
3114 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3117 if (BFIChunks > OrChunks)
3123 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3131 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3140 if (!Dst.hasOneUse())
3143 EVT VT = Dst.getValueType();
3144 assert((VT == MVT::i32 || VT == MVT::i64) &&
3145 "Caller should guarantee that VT is one of i32 or i64");
3153 SDValue DstOp0 = Dst.getOperand(0);
3173 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3174 unsigned MaskWidth =
3177 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3183 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1,
DL, VT));
3184 ShiftedOperand =
SDValue(UBFMNode, 0);
3194 ShiftedOperand = Dst.getOperand(0);
3201 ShiftedOperand = Dst.getOperand(0);
3213 const bool BiggerPattern) {
3214 EVT VT =
N->getValueType(0);
3215 assert(
N->getOpcode() ==
ISD::OR &&
"Expect N to be an OR node");
3216 assert(((
N->getOperand(0) == OrOpd0 &&
N->getOperand(1) == OrOpd1) ||
3217 (
N->getOperand(1) == OrOpd0 &&
N->getOperand(0) == OrOpd1)) &&
3218 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3219 assert((VT == MVT::i32 || VT == MVT::i64) &&
3220 "Expect result type to be i32 or i64 since N is combinable to BFM");
3227 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3230 if (BiggerPattern) {
3244 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3253 assert((!BiggerPattern) &&
"BiggerPattern should be handled above");
3315 EVT VT =
N->getValueType(0);
3316 if (VT != MVT::i32 && VT != MVT::i64)
3324 unsigned NumberOfIgnoredLowBits = UsefulBits.
countr_zero();
3325 unsigned NumberOfIgnoredHighBits = UsefulBits.
countl_zero();
3345 for (
int I = 0;
I < 4; ++
I) {
3348 unsigned ImmR, ImmS;
3349 bool BiggerPattern =
I / 2;
3350 SDValue OrOpd0Val =
N->getOperand(
I % 2);
3352 SDValue OrOpd1Val =
N->getOperand((
I + 1) % 2);
3358 NumberOfIgnoredLowBits, BiggerPattern)) {
3361 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3362 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3367 Width = ImmS - ImmR + 1;
3378 Src, DstLSB, Width)) {
3386 assert((VT == MVT::i32 || VT == MVT::i64) &&
"unexpected OR operand");
3396 APInt BitsToBeInserted =
3399 if ((BitsToBeInserted & ~Known.
Zero) != 0)
3423 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3456 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3458 if (Src->hasOneUse() &&
3461 Src = Src->getOperand(0);
3471 unsigned ImmS = Width - 1;
3477 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3485bool AArch64DAGToDAGISel::tryBitfieldInsertOp(
SDNode *
N) {
3494 CurDAG->SelectNodeTo(
N, TargetOpcode::IMPLICIT_DEF,
N->getValueType(0));
3507bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(
SDNode *
N) {
3511 EVT VT =
N->getValueType(0);
3512 if (VT != MVT::i32 && VT != MVT::i64)
3518 Op0, DstLSB, Width))
3524 unsigned ImmS =
Width - 1;
3527 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR,
DL, VT),
3528 CurDAG->getTargetConstant(ImmS,
DL, VT)};
3529 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3530 CurDAG->SelectNodeTo(
N, Opc, VT, Ops);
3536bool AArch64DAGToDAGISel::tryShiftAmountMod(
SDNode *
N) {
3537 EVT VT =
N->getValueType(0);
3540 switch (
N->getOpcode()) {
3542 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3545 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3548 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3551 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3559 if (VT == MVT::i32) {
3562 }
else if (VT == MVT::i64) {
3568 SDValue ShiftAmt =
N->getOperand(1);
3588 (Add0Imm %
Size == 0)) {
3594 if (SubVT == MVT::i32) {
3595 NegOpc = AArch64::SUBWrr;
3596 ZeroReg = AArch64::WZR;
3598 assert(SubVT == MVT::i64);
3599 NegOpc = AArch64::SUBXrr;
3600 ZeroReg = AArch64::XZR;
3603 CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
DL, ZeroReg, SubVT);
3605 CurDAG->getMachineNode(NegOpc,
DL, SubVT, Zero, Add1);
3606 NewShiftAmt =
SDValue(Neg, 0);
3614 if (SubVT == MVT::i32) {
3615 NotOpc = AArch64::ORNWrr;
3616 ZeroReg = AArch64::WZR;
3618 assert(SubVT == MVT::i64);
3619 NotOpc = AArch64::ORNXrr;
3620 ZeroReg = AArch64::XZR;
3623 CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
DL, ZeroReg, SubVT);
3625 CurDAG->getMachineNode(NotOpc,
DL, SubVT, Zero, Add1);
3626 NewShiftAmt =
SDValue(Not, 0);
3647 else if (VT == MVT::i64 && NewShiftAmt->
getValueType(0) == MVT::i32) {
3648 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32,
DL, MVT::i32);
3650 AArch64::SUBREG_TO_REG,
DL, VT,
3651 CurDAG->getTargetConstant(0,
DL, MVT::i64), NewShiftAmt,
SubReg);
3652 NewShiftAmt =
SDValue(Ext, 0);
3655 SDValue Ops[] = {
N->getOperand(0), NewShiftAmt};
3656 CurDAG->SelectNodeTo(
N, Opc, VT, Ops);
3663 bool isReciprocal) {
3666 FVal = CN->getValueAPF();
3667 else if (
LoadSDNode *LN = dyn_cast<LoadSDNode>(
N)) {
3670 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3674 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3675 FVal = cast<ConstantFP>(CN->
getConstVal())->getValueAPF();
3698 if (!IsExact || !IntVal.isPowerOf2())
3700 unsigned FBits = IntVal.logBase2();
3704 if (FBits == 0 || FBits > RegWidth)
return false;
3710bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(
SDValue N,
SDValue &FixedPos,
3711 unsigned RegWidth) {
3716bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(
SDValue N,
3718 unsigned RegWidth) {
3728 RegString.
split(Fields,
':');
3730 if (Fields.
size() == 1)
3734 &&
"Invalid number of fields in read register string");
3737 bool AllIntFields =
true;
3741 AllIntFields &= !
Field.getAsInteger(10, IntField);
3746 "Unexpected non-integer value in special register string.");
3751 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3752 (Ops[3] << 3) | (Ops[4]);
3759bool AArch64DAGToDAGISel::tryReadRegister(
SDNode *
N) {
3760 const auto *MD = cast<MDNodeSDNode>(
N->getOperand(1));
3761 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3766 unsigned Opcode64Bit = AArch64::MRS;
3771 const auto *TheReg =
3773 if (TheReg && TheReg->Readable &&
3774 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3775 Imm = TheReg->Encoding;
3781 if (!ReadIs128Bit && RegString->getString() ==
"pc") {
3782 Opcode64Bit = AArch64::ADR;
3790 SDValue InChain =
N->getOperand(0);
3791 SDValue SysRegImm = CurDAG->getTargetConstant(Imm,
DL, MVT::i32);
3792 if (!ReadIs128Bit) {
3793 CurDAG->SelectNodeTo(
N, Opcode64Bit, MVT::i64, MVT::Other ,
3794 {SysRegImm, InChain});
3798 {MVT::Untyped , MVT::Other },
3799 {SysRegImm, InChain});
3803 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64,
DL, MVT::i64,
3805 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64,
DL, MVT::i64,
3811 ReplaceUses(
SDValue(
N, 2), OutChain);
3820bool AArch64DAGToDAGISel::tryWriteRegister(
SDNode *
N) {
3821 const auto *MD = cast<MDNodeSDNode>(
N->getOperand(1));
3822 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3827 if (!WriteIs128Bit) {
3833 auto trySelectPState = [&](
auto PMapper,
unsigned State) {
3835 assert(isa<ConstantSDNode>(
N->getOperand(2)) &&
3836 "Expected a constant integer expression.");
3837 unsigned Reg = PMapper->Encoding;
3838 uint64_t Immed = cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue();
3839 CurDAG->SelectNodeTo(
3840 N, State, MVT::Other, CurDAG->getTargetConstant(Reg,
DL, MVT::i32),
3841 CurDAG->getTargetConstant(Immed,
DL, MVT::i16),
N->getOperand(0));
3847 if (trySelectPState(
3848 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3849 AArch64::MSRpstateImm4))
3851 if (trySelectPState(
3852 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3853 AArch64::MSRpstateImm1))
3863 if (TheReg && TheReg->Writeable &&
3864 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3865 Imm = TheReg->Encoding;
3873 SDValue InChain =
N->getOperand(0);
3874 if (!WriteIs128Bit) {
3875 CurDAG->SelectNodeTo(
N, AArch64::MSR, MVT::Other,
3876 CurDAG->getTargetConstant(Imm,
DL, MVT::i32),
3877 N->getOperand(2), InChain);
3881 SDNode *Pair = CurDAG->getMachineNode(
3882 TargetOpcode::REG_SEQUENCE,
DL, MVT::Untyped ,
3883 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(),
DL,
3886 CurDAG->getTargetConstant(AArch64::sube64,
DL, MVT::i32),
3888 CurDAG->getTargetConstant(AArch64::subo64,
DL, MVT::i32)});
3890 CurDAG->SelectNodeTo(
N, AArch64::MSRR, MVT::Other,
3891 CurDAG->getTargetConstant(Imm,
DL, MVT::i32),
3899bool AArch64DAGToDAGISel::SelectCMP_SWAP(
SDNode *
N) {
3901 EVT MemTy = cast<MemSDNode>(
N)->getMemoryVT();
3904 if (Subtarget->hasLSE())
return false;
3906 if (MemTy == MVT::i8)
3907 Opcode = AArch64::CMP_SWAP_8;
3908 else if (MemTy == MVT::i16)
3909 Opcode = AArch64::CMP_SWAP_16;
3910 else if (MemTy == MVT::i32)
3911 Opcode = AArch64::CMP_SWAP_32;
3912 else if (MemTy == MVT::i64)
3913 Opcode = AArch64::CMP_SWAP_64;
3917 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3918 SDValue Ops[] = {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3),
3920 SDNode *CmpSwap = CurDAG->getMachineNode(
3922 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3925 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {
MemOp});
3929 CurDAG->RemoveDeadNode(
N);
3936 if (!isa<ConstantSDNode>(
N))
3948 Shift = CurDAG->getTargetConstant(0,
DL, MVT::i32);
3949 Imm = CurDAG->getTargetConstant(Val,
DL, MVT::i32);
3956 Shift = CurDAG->getTargetConstant(0,
DL, MVT::i32);
3957 Imm = CurDAG->getTargetConstant(Val,
DL, MVT::i32);
3961 if (Val <= 65280 && Val % 256 == 0) {
3962 Shift = CurDAG->getTargetConstant(8,
DL, MVT::i32);
3963 Imm = CurDAG->getTargetConstant(Val >> 8,
DL, MVT::i32);
3976 if (!isa<ConstantSDNode>(
N))
3980 int64_t Val = cast<ConstantSDNode>(
N)
3988 Shift = CurDAG->getTargetConstant(0,
DL, MVT::i32);
3989 Imm = CurDAG->getTargetConstant(Val & 0xFF,
DL, MVT::i32);
3995 if (Val >= -128 && Val <= 127) {
3996 Shift = CurDAG->getTargetConstant(0,
DL, MVT::i32);
3997 Imm = CurDAG->getTargetConstant(Val & 0xFF,
DL, MVT::i32);
4001 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4002 Shift = CurDAG->getTargetConstant(8,
DL, MVT::i32);
4003 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF,
DL, MVT::i32);
4014bool AArch64DAGToDAGISel::SelectSVESignedArithImm(
SDValue N,
SDValue &Imm) {
4015 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
4016 int64_t ImmVal = CNode->getSExtValue();
4018 if (ImmVal >= -128 && ImmVal < 128) {
4019 Imm = CurDAG->getTargetConstant(ImmVal,
DL, MVT::i32);
4027 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
4028 uint64_t ImmVal = CNode->getZExtValue();
4038 ImmVal &= 0xFFFFFFFF;
4047 Imm = CurDAG->getTargetConstant(ImmVal,
SDLoc(
N), MVT::i32);
4056 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
4057 uint64_t ImmVal = CNode->getZExtValue();
4067 ImmVal |= ImmVal << 8;
4068 ImmVal |= ImmVal << 16;
4069 ImmVal |= ImmVal << 32;
4073 ImmVal |= ImmVal << 16;
4074 ImmVal |= ImmVal << 32;
4077 ImmVal &= 0xFFFFFFFF;
4078 ImmVal |= ImmVal << 32;
4088 Imm = CurDAG->getTargetConstant(encoding,
DL, MVT::i64);
4103 if (
auto *CN = dyn_cast<ConstantSDNode>(
N)) {
4104 uint64_t ImmVal = CN->getZExtValue();
4111 if (ImmVal >
High) {
4112 if (!AllowSaturation)
4117 Imm = CurDAG->getTargetConstant(ImmVal,
SDLoc(
N), MVT::i32);
4124bool AArch64DAGToDAGISel::trySelectStackSlotTagP(
SDNode *
N) {
4128 if (!(isa<FrameIndexSDNode>(
N->getOperand(1)))) {
4134 cast<ConstantSDNode>(IRG_SP->
getOperand(1))->getZExtValue() !=
4135 Intrinsic::aarch64_irg_sp) {
4141 int FI = cast<FrameIndexSDNode>(
N->getOperand(1))->getIndex();
4142 SDValue FiOp = CurDAG->getTargetFrameIndex(
4144 int TagOffset = cast<ConstantSDNode>(
N->getOperand(3))->getZExtValue();
4146 SDNode *Out = CurDAG->getMachineNode(
4147 AArch64::TAGPstack,
DL, MVT::i64,
4148 {FiOp, CurDAG->getTargetConstant(0,
DL, MVT::i64),
N->getOperand(2),
4149 CurDAG->getTargetConstant(TagOffset,
DL, MVT::i64)});
4150 ReplaceNode(
N, Out);
4154void AArch64DAGToDAGISel::SelectTagP(
SDNode *
N) {
4155 assert(isa<ConstantSDNode>(
N->getOperand(3)) &&
4156 "llvm.aarch64.tagp third argument must be an immediate");
4157 if (trySelectStackSlotTagP(
N))
4164 int TagOffset = cast<ConstantSDNode>(
N->getOperand(3))->getZExtValue();
4165 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP,
DL, MVT::i64,
4166 {
N->getOperand(1),
N->getOperand(2)});
4167 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr,
DL, MVT::i64,
4168 {
SDValue(N1, 0),
N->getOperand(2)});
4169 SDNode *N3 = CurDAG->getMachineNode(
4170 AArch64::ADDG,
DL, MVT::i64,
4171 {
SDValue(N2, 0), CurDAG->getTargetConstant(0,
DL, MVT::i64),
4172 CurDAG->getTargetConstant(TagOffset,
DL, MVT::i64)});
4176bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(
SDNode *
N) {
4180 if (cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue() != 0)
4182 if (!
N->getOperand(0).isUndef())
4186 EVT VT =
N->getValueType(0);
4187 EVT InVT =
N->getOperand(1).getValueType();
4198 "Expected to insert into a packed scalable vector!");
4201 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID,
DL, MVT::i64);
4202 ReplaceNode(
N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
DL, VT,
4203 N->getOperand(1), RC));
4207bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(
SDNode *
N) {
4211 if (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue() != 0)
4215 EVT VT =
N->getValueType(0);
4216 EVT InVT =
N->getOperand(0).getValueType();
4227 "Expected to extract from a packed scalable vector!");
4230 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID,
DL, MVT::i64);
4231 ReplaceNode(
N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
DL, VT,
4232 N->getOperand(0), RC));
4236void AArch64DAGToDAGISel::Select(
SDNode *
Node) {
4238 if (
Node->isMachineOpcode()) {
4240 Node->setNodeId(-1);
4245 EVT VT =
Node->getValueType(0);
4247 switch (
Node->getOpcode()) {
4252 if (SelectCMP_SWAP(
Node))
4258 if (tryReadRegister(
Node))
4264 if (tryWriteRegister(
Node))
4271 if (tryIndexedLoad(
Node))
4280 if (tryBitfieldExtractOp(
Node))
4282 if (tryBitfieldInsertInZeroOp(
Node))
4287 if (tryShiftAmountMod(
Node))
4292 if (tryBitfieldExtractOpFromSExt(
Node))
4297 if (tryBitfieldInsertOp(
Node))
4302 if (trySelectCastScalableToFixedLengthVector(
Node))
4308 if (trySelectCastFixedLengthToScalableVector(
Node))
4317 if (ConstNode->
isZero()) {
4318 if (VT == MVT::i32) {
4320 CurDAG->getEntryNode(),
SDLoc(
Node), AArch64::WZR, MVT::i32);
4321 ReplaceNode(
Node,
New.getNode());
4323 }
else if (VT == MVT::i64) {
4325 CurDAG->getEntryNode(),
SDLoc(
Node), AArch64::XZR, MVT::i64);
4326 ReplaceNode(
Node,
New.getNode());
4335 int FI = cast<FrameIndexSDNode>(
Node)->getIndex();
4338 SDValue TFI = CurDAG->getTargetFrameIndex(
4341 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0,
DL, MVT::i32),
4342 CurDAG->getTargetConstant(Shifter,
DL, MVT::i32) };
4343 CurDAG->SelectNodeTo(
Node, AArch64::ADDXri, MVT::i64, Ops);
4347 unsigned IntNo = cast<ConstantSDNode>(
Node->getOperand(1))->getZExtValue();
4351 case Intrinsic::aarch64_ldaxp:
4352 case Intrinsic::aarch64_ldxp: {
4354 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4359 SDNode *Ld = CurDAG->getMachineNode(
Op,
DL, MVT::i64, MVT::i64,
4360 MVT::Other, MemAddr, Chain);
4364 cast<MemIntrinsicSDNode>(
Node)->getMemOperand();
4365 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {
MemOp});
4366 ReplaceNode(
Node, Ld);
4369 case Intrinsic::aarch64_stlxp:
4370 case Intrinsic::aarch64_stxp: {
4372 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4380 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4382 SDNode *St = CurDAG->getMachineNode(
Op,
DL, MVT::i32, MVT::Other, Ops);
4385 cast<MemIntrinsicSDNode>(
Node)->getMemOperand();
4386 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
4388 ReplaceNode(
Node, St);
4391 case Intrinsic::aarch64_neon_ld1x2:
4392 if (VT == MVT::v8i8) {
4393 SelectLoad(
Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4395 }
else if (VT == MVT::v16i8) {
4396 SelectLoad(
Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4398 }
else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4399 SelectLoad(
Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4401 }
else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4402 SelectLoad(
Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4404 }
else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4405 SelectLoad(
Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4407 }
else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4408 SelectLoad(
Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4410 }
else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4411 SelectLoad(
Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4413 }
else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4414 SelectLoad(
Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4418 case Intrinsic::aarch64_neon_ld1x3:
4419 if (VT == MVT::v8i8) {
4420 SelectLoad(
Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4422 }
else if (VT == MVT::v16i8) {
4423 SelectLoad(
Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4425 }
else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4426 SelectLoad(
Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4428 }
else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4429 SelectLoad(
Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4431 }
else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4432 SelectLoad(
Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4434 }
else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4435 SelectLoad(
Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4437 }
else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4438 SelectLoad(
Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4440 }
else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4441 SelectLoad(
Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4445 case Intrinsic::aarch64_neon_ld1x4:
4446 if (VT == MVT::v8i8) {
4447 SelectLoad(
Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4449 }
else if (VT == MVT::v16i8) {
4450 SelectLoad(
Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4452 }
else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4453 SelectLoad(
Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4455 }
else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4456 SelectLoad(
Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4458 }
else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4459 SelectLoad(
Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4461 }
else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4462 SelectLoad(
Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4464 }
else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4465 SelectLoad(
Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4467 }
else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4468 SelectLoad(
Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4472 case Intrinsic::aarch64_neon_ld2:
4473 if (VT == MVT::v8i8) {
4474 SelectLoad(
Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4476 }
else if (VT == MVT::v16i8) {
4477 SelectLoad(
Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4479 }
else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4480 SelectLoad(
Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4482 }
else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4483 SelectLoad(
Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4485 }
else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4486 SelectLoad(
Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4488 }
else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4489 SelectLoad(
Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4491 }
else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4492 SelectLoad(
Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4494 }
else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4495 SelectLoad(
Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4499 case Intrinsic::aarch64_neon_ld3:
4500 if (VT == MVT::v8i8) {
4501 SelectLoad(
Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4503 }
else if (VT == MVT::v16i8) {
4504 SelectLoad(
Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4506 }
else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4507 SelectLoad(
Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4509 }
else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4510 SelectLoad(
Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4512 }
else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4513 SelectLoad(
Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4515 }
else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4516 SelectLoad(
Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4518 }
else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4519 SelectLoad(
Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4521 }
else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4522 SelectLoad(
Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4526 case Intrinsic::aarch64_neon_ld4:
4527 if (VT == MVT::v8i8) {
4528 SelectLoad(
Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4530 }
else if (VT == MVT::v16i8) {
4531 SelectLoad(
Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4533 }
else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4534 SelectLoad(
Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4536 }
else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4537 SelectLoad(
Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4539 }
else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4540 SelectLoad(
Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4542 }
else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4543 SelectLoad(
Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4545 }
else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4546 SelectLoad(
Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4548 }
else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4549 SelectLoad(
Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4553 case Intrinsic::aarch64_neon_ld2r:
4554 if (VT == MVT::v8i8) {
4555 SelectLoad(
Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4557 }
else if (VT == MVT::v16i8) {
4558 SelectLoad(
Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4560 }
else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4561 SelectLoad(
Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4563 }
else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4564 SelectLoad(
Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4566 }
else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4567 SelectLoad(
Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4569 }
else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4570 SelectLoad(
Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4572 }
else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4573 SelectLoad(
Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4575 }
else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4576 SelectLoad(
Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4580 case Intrinsic::aarch64_neon_ld3r:
4581 if (VT == MVT::v8i8) {
4582 SelectLoad(
Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4584 }
else if (VT == MVT::v16i8) {
4585 SelectLoad(
Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4587 }
else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4588 SelectLoad(
Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4590 }
else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4591 SelectLoad(
Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4593 }
else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4594 SelectLoad(
Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4596 }
else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4597 SelectLoad(
Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4599 }
else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4600 SelectLoad(
Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4602 }
else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4603 SelectLoad(
Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4607 case Intrinsic::aarch64_neon_ld4r:
4608 if (VT == MVT::v8i8) {
4609 SelectLoad(
Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4611 }
else if (VT == MVT::v16i8) {
4612 SelectLoad(
Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4614 }
else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4615 SelectLoad(
Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4617 }
else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4618 SelectLoad(
Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4620 }
else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4621 SelectLoad(
Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4623 }
else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4624 SelectLoad(
Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4626 }
else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4627 SelectLoad(
Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4629 }
else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4630 SelectLoad(
Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4634 case Intrinsic::aarch64_neon_ld2lane:
4635 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4636 SelectLoadLane(
Node, 2, AArch64::LD2i8);
4638 }
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4639 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4640 SelectLoadLane(
Node, 2, AArch64::LD2i16);
4642 }
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4644 SelectLoadLane(
Node, 2, AArch64::LD2i32);
4646 }
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4648 SelectLoadLane(
Node, 2, AArch64::LD2i64);
4652 case Intrinsic::aarch64_neon_ld3lane:
4653 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4654 SelectLoadLane(
Node, 3, AArch64::LD3i8);
4656 }
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4657 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4658 SelectLoadLane(
Node, 3, AArch64::LD3i16);
4660 }
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4662 SelectLoadLane(
Node, 3, AArch64::LD3i32);
4664 }
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4666 SelectLoadLane(
Node, 3, AArch64::LD3i64);
4670 case Intrinsic::aarch64_neon_ld4lane:
4671 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4672 SelectLoadLane(
Node, 4, AArch64::LD4i8);
4674 }
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4675 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4676 SelectLoadLane(
Node, 4, AArch64::LD4i16);
4678 }
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4680 SelectLoadLane(
Node, 4, AArch64::LD4i32);
4682 }
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4684 SelectLoadLane(
Node, 4, AArch64::LD4i64);
4688 case Intrinsic::aarch64_ld64b:
4689 SelectLoad(
Node, 8, AArch64::LD64B, AArch64::x8sub_0);
4691 case Intrinsic::aarch64_sve_ld2_sret: {
4692 if (VT == MVT::nxv16i8) {
4693 SelectPredicatedLoad(
Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
4696 }
else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4697 VT == MVT::nxv8bf16) {
4698 SelectPredicatedLoad(
Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
4701 }
else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4702 SelectPredicatedLoad(
Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
4705 }
else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4706 SelectPredicatedLoad(
Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
4712 case Intrinsic::aarch64_sve_ld1_pn_x2: {
4713 if (VT == MVT::nxv16i8) {
4714 if (Subtarget->hasSME2())
4715 SelectContiguousMultiVectorLoad(
4716 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
4717 else if (Subtarget->hasSVE2p1())
4718 SelectContiguousMultiVectorLoad(
Node, 2, 0, AArch64::LD1B_2Z_IMM,
4723 }
else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4724 VT == MVT::nxv8bf16) {
4725 if (Subtarget->hasSME2())
4726 SelectContiguousMultiVectorLoad(
4727 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
4728 else if (Subtarget->hasSVE2p1())
4729 SelectContiguousMultiVectorLoad(
Node, 2, 1, AArch64::LD1H_2Z_IMM,
4734 }
else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4735 if (Subtarget->hasSME2())
4736 SelectContiguousMultiVectorLoad(
4737 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
4738 else if (Subtarget->hasSVE2p1())
4739 SelectContiguousMultiVectorLoad(
Node, 2, 2, AArch64::LD1W_2Z_IMM,
4744 }
else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4745 if (Subtarget->hasSME2())
4746 SelectContiguousMultiVectorLoad(
4747 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
4748 else if (Subtarget->hasSVE2p1())
4749 SelectContiguousMultiVectorLoad(
Node, 2, 3, AArch64::LD1D_2Z_IMM,
4757 case Intrinsic::aarch64_sve_ld1_pn_x4: {
4758 if (VT == MVT::nxv16i8) {
4759 if (Subtarget->hasSME2())
4760 SelectContiguousMultiVectorLoad(
4761 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
4762 else if (Subtarget->hasSVE2p1())
4763 SelectContiguousMultiVectorLoad(
Node, 4, 0, AArch64::LD1B_4Z_IMM,
4768 }
else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4769 VT == MVT::nxv8bf16) {
4770 if (Subtarget->hasSME2())
4771 SelectContiguousMultiVectorLoad(
4772 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
4773 else if (Subtarget->hasSVE2p1())
4774 SelectContiguousMultiVectorLoad(
Node, 4, 1, AArch64::LD1H_4Z_IMM,
4779 }
else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4780 if (Subtarget->hasSME2())
4781 SelectContiguousMultiVectorLoad(
4782 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
4783 else if (Subtarget->hasSVE2p1())
4784 SelectContiguousMultiVectorLoad(
Node, 4, 2, AArch64::LD1W_4Z_IMM,
4789 }
else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4790 if (Subtarget->hasSME2())
4791 SelectContiguousMultiVectorLoad(
4792 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
4793 else if (Subtarget->hasSVE2p1())
4794 SelectContiguousMultiVectorLoad(
Node, 4, 3, AArch64::LD1D_4Z_IMM,
4802 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
4803 if (VT == MVT::nxv16i8) {
4804 if (Subtarget->hasSME2())
4805 SelectContiguousMultiVectorLoad(
Node, 2, 0,
4806 AArch64::LDNT1B_2Z_IMM_PSEUDO,
4807 AArch64::LDNT1B_2Z_PSEUDO);
4808 else if (Subtarget->hasSVE2p1())
4809 SelectContiguousMultiVectorLoad(
Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
4810 AArch64::LDNT1B_2Z);
4814 }
else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4815 VT == MVT::nxv8bf16) {
4816 if (Subtarget->hasSME2())
4817 SelectContiguousMultiVectorLoad(
Node, 2, 1,
4818 AArch64::LDNT1H_2Z_IMM_PSEUDO,
4819 AArch64::LDNT1H_2Z_PSEUDO);
4820 else if (Subtarget->hasSVE2p1())
4821 SelectContiguousMultiVectorLoad(
Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
4822 AArch64::LDNT1H_2Z);
4826 }
else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4827 if (Subtarget->hasSME2())
4828 SelectContiguousMultiVectorLoad(
Node, 2, 2,
4829 AArch64::LDNT1W_2Z_IMM_PSEUDO,
4830 AArch64::LDNT1W_2Z_PSEUDO);
4831 else if (Subtarget->hasSVE2p1())
4832 SelectContiguousMultiVectorLoad(
Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
4833 AArch64::LDNT1W_2Z);
4837 }
else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4838 if (Subtarget->hasSME2())
4839 SelectContiguousMultiVectorLoad(
Node, 2, 3,
4840 AArch64::LDNT1D_2Z_IMM_PSEUDO,
4841 AArch64::LDNT1D_2Z_PSEUDO);
4842 else if (Subtarget->hasSVE2p1())
4843 SelectContiguousMultiVectorLoad(
Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
4844 AArch64::LDNT1D_2Z);
4851 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
4852 if (VT == MVT::nxv16i8) {
4853 if (Subtarget->hasSME2())
4854 SelectContiguousMultiVectorLoad(
Node, 4, 0,
4855 AArch64::LDNT1B_4Z_IMM_PSEUDO,
4856 AArch64::LDNT1B_4Z_PSEUDO);
4857 else if (Subtarget->hasSVE2p1())
4858 SelectContiguousMultiVectorLoad(
Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
4859 AArch64::LDNT1B_4Z);
4863 }
else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4864 VT == MVT::nxv8bf16) {
4865 if (Subtarget->hasSME2())
4866 SelectContiguousMultiVectorLoad(
Node, 4, 1,
4867 AArch64::LDNT1H_4Z_IMM_PSEUDO,
4868 AArch64::LDNT1H_4Z_PSEUDO);
4869 else if (Subtarget->hasSVE2p1())
4870 SelectContiguousMultiVectorLoad(
Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
4871 AArch64::LDNT1H_4Z);
4875 }
else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4876 if (Subtarget->hasSME2())
4877 SelectContiguousMultiVectorLoad(
Node, 4, 2,
4878 AArch64::LDNT1W_4Z_IMM_PSEUDO,
4879 AArch64::LDNT1W_4Z_PSEUDO);
4880 else if (Subtarget->hasSVE2p1())
4881 SelectContiguousMultiVectorLoad(
Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
4882 AArch64::LDNT1W_4Z);
4886 }
else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4887 if (Subtarget->hasSME2())
4888 SelectContiguousMultiVectorLoad(
Node, 4, 3,
4889 AArch64::LDNT1D_4Z_IMM_PSEUDO,
4890 AArch64::LDNT1D_4Z_PSEUDO);
4891 else if (Subtarget->hasSVE2p1())
4892 SelectContiguousMultiVectorLoad(
Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
4893 AArch64::LDNT1D_4Z);
4900 case Intrinsic::aarch64_sve_ld3_sret: {
4901 if (VT == MVT::nxv16i8) {
4902 SelectPredicatedLoad(
Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
4905 }
else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4906 VT == MVT::nxv8bf16) {
4907 SelectPredicatedLoad(
Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
4910 }
else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4911 SelectPredicatedLoad(
Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
4914 }
else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4915 SelectPredicatedLoad(
Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
4921 case Intrinsic::aarch64_sve_ld4_sret: {
4922 if (VT == MVT::nxv16i8) {
4923 SelectPredicatedLoad(
Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
4926 }
else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4927 VT == MVT::nxv8bf16) {
4928 SelectPredicatedLoad(
Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
4931 }
else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4932 SelectPredicatedLoad(
Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
4935 }
else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4936 SelectPredicatedLoad(
Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,