21 #include "llvm/IR/IntrinsicsAArch64.h"
30 #define DEBUG_TYPE "aarch64-isel"
50 return "AArch64 Instruction Selection";
62 bool SelectInlineAsmMemoryOperand(
const SDValue &
Op,
63 unsigned ConstraintID,
64 std::vector<SDValue> &OutOps)
override;
66 template <
signed Low,
signed High,
signed Scale>
69 bool tryMLAV64LaneV128(
SDNode *
N);
70 bool tryMULLV64LaneV128(
unsigned IntNo,
SDNode *
N);
75 return SelectShiftedRegister(
N,
false,
Reg,
Shift);
78 return SelectShiftedRegister(
N,
true,
Reg,
Shift);
81 return SelectAddrModeIndexed7S(
N, 1,
Base, OffImm);
84 return SelectAddrModeIndexed7S(
N, 2,
Base, OffImm);
87 return SelectAddrModeIndexed7S(
N, 4,
Base, OffImm);
90 return SelectAddrModeIndexed7S(
N, 8,
Base, OffImm);
93 return SelectAddrModeIndexed7S(
N, 16,
Base, OffImm);
96 return SelectAddrModeIndexedBitWidth(
N,
true, 9, 16,
Base, OffImm);
99 return SelectAddrModeIndexedBitWidth(
N,
false, 6, 16,
Base, OffImm);
102 return SelectAddrModeIndexed(
N, 1,
Base, OffImm);
105 return SelectAddrModeIndexed(
N, 2,
Base, OffImm);
108 return SelectAddrModeIndexed(
N, 4,
Base, OffImm);
111 return SelectAddrModeIndexed(
N, 8,
Base, OffImm);
114 return SelectAddrModeIndexed(
N, 16,
Base, OffImm);
117 return SelectAddrModeUnscaled(
N, 1,
Base, OffImm);
120 return SelectAddrModeUnscaled(
N, 2,
Base, OffImm);
123 return SelectAddrModeUnscaled(
N, 4,
Base, OffImm);
126 return SelectAddrModeUnscaled(
N, 8,
Base, OffImm);
129 return SelectAddrModeUnscaled(
N, 16,
Base, OffImm);
131 template <
unsigned Size,
unsigned Max>
135 bool Found = SelectAddrModeIndexed(
N, Size,
Base, OffImm);
137 if (
auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
138 int64_t
C = CI->getSExtValue();
153 return SelectAddrModeWRO(
N,
Width / 8,
Base, Offset, SignExtend, DoShift);
159 return SelectAddrModeXRO(
N,
Width / 8,
Base, Offset, SignExtend, DoShift);
162 bool SelectDupZeroOrUndef(
SDValue N) {
163 switch(
N->getOpcode()) {
168 auto Opnd0 =
N->getOperand(0);
169 if (
auto CN = dyn_cast<ConstantSDNode>(Opnd0))
172 if (
auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
185 switch(
N->getOpcode()) {
188 auto Opnd0 =
N->getOperand(0);
189 if (
auto CN = dyn_cast<ConstantSDNode>(Opnd0))
192 if (
auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
202 template<MVT::SimpleValueType VT>
204 return SelectSVEAddSubImm(
N, VT, Imm,
Shift);
207 template <MVT::SimpleValueType VT>
209 return SelectSVECpyDupImm(
N, VT, Imm,
Shift);
212 template <MVT::SimpleValueType VT,
bool Invert = false>
214 return SelectSVELogicalImm(
N, VT, Imm, Invert);
217 template <MVT::SimpleValueType VT>
219 return SelectSVEArithImm(
N, VT, Imm);
222 template <
unsigned Low,
unsigned High,
bool AllowSaturation = false>
224 return SelectSVEShiftImm(
N,
Low,
High, AllowSaturation, Imm);
228 template<
signed Min,
signed Max,
signed Scale,
bool Shift>
230 if (!isa<ConstantSDNode>(
N))
233 int64_t MulImm = cast<ConstantSDNode>(
N)->getSExtValue();
235 MulImm = 1LL << MulImm;
237 if ((MulImm %
std::abs(Scale)) != 0)
241 if ((MulImm >= Min) && (MulImm <= Max)) {
249 template <
signed Max,
signed Scale>
251 if (!isa<ConstantSDNode>(
N))
254 int64_t MulImm = cast<ConstantSDNode>(
N)->getSExtValue();
256 if (MulImm >= 0 && MulImm <= Max) {
278 const unsigned SubRegs[]);
280 void SelectTable(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
bool isExt);
282 bool tryIndexedLoad(
SDNode *
N);
284 bool trySelectStackSlotTagP(
SDNode *
N);
287 void SelectLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
289 void SelectPostLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
291 void SelectLoadLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
292 void SelectPostLoadLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
293 void SelectPredicatedLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Scale,
294 unsigned Opc_rr,
unsigned Opc_ri,
295 bool IsIntr =
false);
299 template <
int64_t Min,
int64_t Max>
303 template <
unsigned Scale>
305 return SelectSVERegRegAddrMode(
N, Scale,
Base, Offset);
308 void SelectStore(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
309 void SelectPostStore(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
310 void SelectStoreLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
311 void SelectPostStoreLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
312 void SelectPredicatedStore(
SDNode *
N,
unsigned NumVecs,
unsigned Scale,
313 unsigned Opc_rr,
unsigned Opc_ri);
314 std::tuple<unsigned, SDValue, SDValue>
315 findAddrModeSVELoadStore(
SDNode *
N,
unsigned Opc_rr,
unsigned Opc_ri,
319 bool tryBitfieldExtractOp(
SDNode *
N);
320 bool tryBitfieldExtractOpFromSExt(
SDNode *
N);
321 bool tryBitfieldInsertOp(
SDNode *
N);
322 bool tryBitfieldInsertInZeroOp(
SDNode *
N);
323 bool tryShiftAmountMod(
SDNode *
N);
326 bool tryReadRegister(
SDNode *
N);
327 bool tryWriteRegister(
SDNode *
N);
330 #include "AArch64GenDAGISel.inc"
337 return SelectAddrModeIndexedBitWidth(
N,
true, 7, Size,
Base, OffImm);
339 bool SelectAddrModeIndexedBitWidth(
SDValue N,
bool IsSignedImm,
unsigned BW,
352 bool isWorthFolding(
SDValue V)
const;
353 bool SelectExtendedSHL(
SDValue N,
unsigned Size,
bool WantExtend,
356 template<
unsigned RegW
idth>
358 return SelectCVTFixedPosOperand(
N, FixedPos, RegWidth);
363 bool SelectCMP_SWAP(
SDNode *
N);
371 bool AllowSaturation,
SDValue &Imm);
377 bool SelectAllActivePredicate(
SDValue N);
385 Imm =
C->getZExtValue();
402 return N->getOpcode() == Opc &&
406 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
407 const SDValue &
Op,
unsigned ConstraintID, std::vector<SDValue> &OutOps) {
408 switch(ConstraintID) {
421 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
422 dl,
Op.getValueType(),
424 OutOps.push_back(NewOp);
440 if (!isa<ConstantSDNode>(
N.getNode()))
443 uint64_t Immed = cast<ConstantSDNode>(
N.getNode())->getZExtValue();
446 if (Immed >> 12 == 0) {
448 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
456 Val = CurDAG->getTargetConstant(Immed, dl,
MVT::i32);
470 if (!isa<ConstantSDNode>(
N.getNode()))
474 uint64_t Immed = cast<ConstantSDNode>(
N.getNode())->getZExtValue();
485 Immed = ~Immed + 1ULL;
486 if (Immed & 0xFFFFFFFFFF000000ULL)
489 Immed &= 0xFFFFFFULL;
490 return SelectArithImmed(CurDAG->getConstant(Immed,
SDLoc(
N),
MVT::i32), Val,
497 switch (
N.getOpcode()) {
516 auto *CSD = dyn_cast<ConstantSDNode>(V.
getOperand(1));
519 unsigned ShiftVal = CSD->getZExtValue();
527 for (
SDNode *UI : Node->uses())
528 if (!isa<MemSDNode>(*UI))
530 if (!isa<MemSDNode>(*UII))
536 bool AArch64DAGToDAGISel::isWorthFolding(
SDValue V)
const {
539 if (CurDAG->shouldOptForSize() || V.
hasOneUse())
564 bool AArch64DAGToDAGISel::SelectShiftedRegister(
SDValue N,
bool AllowROR,
573 unsigned BitSize =
N.getValueSizeInBits();
574 unsigned Val =
RHS->getZExtValue() & (BitSize - 1);
577 Reg =
N.getOperand(0);
579 return isWorthFolding(
N);
593 SrcVT = cast<VTSDNode>(
N.getOperand(1))->getVT();
595 SrcVT =
N.getOperand(0).getValueType();
597 if (!IsLoadStore && SrcVT ==
MVT::i8)
599 else if (!IsLoadStore && SrcVT ==
MVT::i16)
608 EVT SrcVT =
N.getOperand(0).getValueType();
609 if (!IsLoadStore && SrcVT ==
MVT::i8)
611 else if (!IsLoadStore && SrcVT ==
MVT::i16)
664 SDValue &LaneOp,
int &LaneIdx) {
678 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(
SDNode *
N) {
698 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
700 unsigned MLAOpc = ~0U;
702 switch (
N->getSimpleValueType(0).SimpleTy) {
706 MLAOpc = AArch64::MLAv4i16_indexed;
709 MLAOpc = AArch64::MLAv8i16_indexed;
712 MLAOpc = AArch64::MLAv2i32_indexed;
715 MLAOpc = AArch64::MLAv4i32_indexed;
719 ReplaceNode(
N, CurDAG->getMachineNode(MLAOpc, dl,
N->getValueType(0), Ops));
723 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(
unsigned IntNo,
SDNode *
N) {
735 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
737 unsigned SMULLOpc = ~0U;
739 if (IntNo == Intrinsic::aarch64_neon_smull) {
740 switch (
N->getSimpleValueType(0).SimpleTy) {
744 SMULLOpc = AArch64::SMULLv4i16_indexed;
747 SMULLOpc = AArch64::SMULLv2i32_indexed;
750 }
else if (IntNo == Intrinsic::aarch64_neon_umull) {
751 switch (
N->getSimpleValueType(0).SimpleTy) {
755 SMULLOpc = AArch64::UMULLv4i16_indexed;
758 SMULLOpc = AArch64::UMULLv2i32_indexed;
764 ReplaceNode(
N, CurDAG->getMachineNode(SMULLOpc, dl,
N->getValueType(0), Ops));
784 template<
signed Low,
signed High,
signed Scale>
786 if (!isa<ConstantSDNode>(
N))
789 int64_t MulImm = cast<ConstantSDNode>(
N)->getSExtValue();
790 if ((MulImm %
std::abs(Scale)) == 0) {
791 int64_t RDVLImm = MulImm / Scale;
792 if ((RDVLImm >= Low) && (RDVLImm <=
High)) {
805 unsigned ShiftVal = 0;
820 Reg =
N.getOperand(0).getOperand(0);
826 Reg =
N.getOperand(0);
830 Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*
Reg.getNode()))
843 return isWorthFolding(
N);
852 for (
auto Use :
N->uses()) {
870 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(
SDValue N,
bool IsSignedImm,
871 unsigned BW,
unsigned Size,
878 int FI = cast<FrameIndexSDNode>(
N)->getIndex();
880 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
886 if (CurDAG->isBaseWithConstantOffset(
N)) {
889 int64_t RHSC =
RHS->getSExtValue();
890 unsigned Scale =
Log2_32(Size);
891 int64_t Range = 0x1LL << (BW - 1);
893 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
894 RHSC < (Range << Scale)) {
895 Base =
N.getOperand(0);
897 int FI = cast<FrameIndexSDNode>(
Base)->getIndex();
900 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl,
MVT::i64);
906 unsigned Scale =
Log2_32(Size);
909 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
910 Base =
N.getOperand(0);
912 int FI = cast<FrameIndexSDNode>(
Base)->getIndex();
915 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl,
MVT::i64);
926 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
933 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(
SDValue N,
unsigned Size,
939 int FI = cast<FrameIndexSDNode>(
N)->getIndex();
941 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
947 dyn_cast<GlobalAddressSDNode>(
N.getOperand(1).getNode());
948 Base =
N.getOperand(0);
949 OffImm =
N.getOperand(1);
958 if (CurDAG->isBaseWithConstantOffset(
N)) {
960 int64_t RHSC = (int64_t)
RHS->getZExtValue();
961 unsigned Scale =
Log2_32(Size);
962 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
963 Base =
N.getOperand(0);
965 int FI = cast<FrameIndexSDNode>(
Base)->getIndex();
968 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl,
MVT::i64);
976 if (SelectAddrModeUnscaled(
N, Size,
Base, OffImm))
984 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
993 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(
SDValue N,
unsigned Size,
996 if (!CurDAG->isBaseWithConstantOffset(
N))
999 int64_t RHSC =
RHS->getSExtValue();
1001 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
1002 RHSC < (0x1000 <<
Log2_32(Size)))
1004 if (RHSC >= -256 && RHSC < 256) {
1005 Base =
N.getOperand(0);
1007 int FI = cast<FrameIndexSDNode>(
Base)->getIndex();
1009 Base = CurDAG->getTargetFrameIndex(
1031 bool AArch64DAGToDAGISel::SelectExtendedSHL(
SDValue N,
unsigned Size,
1032 bool WantExtend,
SDValue &Offset,
1054 unsigned LegalShiftVal =
Log2_32(Size);
1057 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1060 return isWorthFolding(
N);
1063 bool AArch64DAGToDAGISel::SelectAddrModeWRO(
SDValue N,
unsigned Size,
1075 if (isa<ConstantSDNode>(
LHS) || isa<ConstantSDNode>(
RHS))
1081 const SDNode *Node =
N.getNode();
1082 for (
SDNode *UI : Node->uses()) {
1083 if (!isa<MemSDNode>(*UI))
1088 bool IsExtendedRegisterWorthFolding = isWorthFolding(
N);
1091 if (IsExtendedRegisterWorthFolding &&
RHS.getOpcode() ==
ISD::SHL &&
1092 SelectExtendedSHL(
RHS, Size,
true, Offset, SignExtend)) {
1094 DoShift = CurDAG->getTargetConstant(
true, dl,
MVT::i32);
1099 if (IsExtendedRegisterWorthFolding &&
LHS.getOpcode() ==
ISD::SHL &&
1100 SelectExtendedSHL(
LHS, Size,
true, Offset, SignExtend)) {
1102 DoShift = CurDAG->getTargetConstant(
true, dl,
MVT::i32);
1107 DoShift = CurDAG->getTargetConstant(
false, dl,
MVT::i32);
1111 if (IsExtendedRegisterWorthFolding &&
1118 if (isWorthFolding(
LHS))
1123 if (IsExtendedRegisterWorthFolding &&
1130 if (isWorthFolding(
RHS))
1142 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1145 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1147 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1148 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1152 bool AArch64DAGToDAGISel::SelectAddrModeXRO(
SDValue N,
unsigned Size,
1165 const SDNode *Node =
N.getNode();
1166 for (
SDNode *UI : Node->uses()) {
1167 if (!isa<MemSDNode>(*UI))
1182 if (isa<ConstantSDNode>(
RHS)) {
1183 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(
RHS)->getZExtValue();
1184 unsigned Scale =
Log2_32(Size);
1188 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1194 CurDAG->getMachineNode(AArch64::MOVi64imm,
DL,
MVT::i64, Ops);
1201 bool IsExtendedRegisterWorthFolding = isWorthFolding(
N);
1204 if (IsExtendedRegisterWorthFolding &&
RHS.getOpcode() ==
ISD::SHL &&
1205 SelectExtendedSHL(
RHS, Size,
false, Offset, SignExtend)) {
1207 DoShift = CurDAG->getTargetConstant(
true,
DL,
MVT::i32);
1212 if (IsExtendedRegisterWorthFolding &&
LHS.getOpcode() ==
ISD::SHL &&
1213 SelectExtendedSHL(
LHS, Size,
false, Offset, SignExtend)) {
1215 DoShift = CurDAG->getTargetConstant(
true,
DL,
MVT::i32);
1223 DoShift = CurDAG->getTargetConstant(
false,
DL,
MVT::i32);
1229 static const unsigned RegClassIDs[] = {
1230 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1231 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1232 AArch64::dsub2, AArch64::dsub3};
1238 static const unsigned RegClassIDs[] = {
1239 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1240 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1241 AArch64::qsub2, AArch64::qsub3};
1247 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1248 AArch64::ZPR3RegClassID,
1249 AArch64::ZPR4RegClassID};
1250 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1251 AArch64::zsub2, AArch64::zsub3};
1257 const unsigned RegClassIDs[],
1258 const unsigned SubRegs[]) {
1261 if (Regs.
size() == 1)
1272 CurDAG->getTargetConstant(RegClassIDs[Regs.
size() - 2],
DL,
MVT::i32));
1275 for (
unsigned i = 0;
i < Regs.
size(); ++
i) {
1276 Ops.push_back(Regs[
i]);
1277 Ops.push_back(CurDAG->getTargetConstant(SubRegs[
i],
DL,
MVT::i32));
1281 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL,
MVT::Untyped, Ops);
1285 void AArch64DAGToDAGISel::SelectTable(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
1288 EVT VT =
N->getValueType(0);
1290 unsigned ExtOff = isExt;
1293 unsigned Vec0Off = ExtOff + 1;
1295 N->op_begin() + Vec0Off + NumVecs);
1300 Ops.push_back(
N->getOperand(1));
1301 Ops.push_back(RegSeq);
1302 Ops.push_back(
N->getOperand(NumVecs + ExtOff + 1));
1303 ReplaceNode(
N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1306 bool AArch64DAGToDAGISel::tryIndexedLoad(
SDNode *
N) {
1308 if (
LD->isUnindexed())
1310 EVT VT =
LD->getMemoryVT();
1311 EVT DstVT =
N->getValueType(0);
1318 unsigned Opcode = 0;
1321 bool InsertTo64 =
false;
1323 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1326 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1328 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1330 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1339 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1341 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1343 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1352 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1354 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1356 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1363 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1365 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1367 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1369 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1371 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1381 SDNode *Res = CurDAG->getMachineNode(Opcode, dl,
MVT::i64, DstVT,
1386 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {
MemOp});
1393 SDValue(CurDAG->getMachineNode(
1394 AArch64::SUBREG_TO_REG, dl,
MVT::i64,
1395 CurDAG->getTargetConstant(0, dl,
MVT::i64), LoadedVal,
1400 ReplaceUses(
SDValue(
N, 0), LoadedVal);
1403 CurDAG->RemoveDeadNode(
N);
1407 void AArch64DAGToDAGISel::SelectLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
1408 unsigned SubRegIdx) {
1410 EVT VT =
N->getValueType(0);
1418 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1420 for (
unsigned i = 0;
i < NumVecs; ++
i)
1422 CurDAG->getTargetExtractSubreg(SubRegIdx +
i, dl, VT, SuperReg));
1428 if (
auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(
N)) {
1430 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {
MemOp});
1433 CurDAG->RemoveDeadNode(
N);
1436 void AArch64DAGToDAGISel::SelectPostLoad(
SDNode *
N,
unsigned NumVecs,
1437 unsigned Opc,
unsigned SubRegIdx) {
1439 EVT VT =
N->getValueType(0);
1449 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1457 ReplaceUses(
SDValue(
N, 0), SuperReg);
1459 for (
unsigned i = 0;
i < NumVecs; ++
i)
1461 CurDAG->getTargetExtractSubreg(SubRegIdx +
i, dl, VT, SuperReg));
1465 CurDAG->RemoveDeadNode(
N);
1471 std::tuple<unsigned, SDValue, SDValue>
1472 AArch64DAGToDAGISel::findAddrModeSVELoadStore(
SDNode *
N,
unsigned Opc_rr,
1478 SDValue NewOffset = OldOffset;
1480 const bool IsRegImm = SelectAddrModeIndexedSVE<-8, 7>(
1481 N, OldBase, NewBase, NewOffset);
1485 const bool IsRegReg =
1486 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1489 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1492 void AArch64DAGToDAGISel::SelectPredicatedLoad(
SDNode *
N,
unsigned NumVecs,
1493 unsigned Scale,
unsigned Opc_ri,
1494 unsigned Opc_rr,
bool IsIntr) {
1495 assert(Scale < 4 &&
"Invalid scaling value.");
1497 EVT VT =
N->getValueType(0);
1503 std::tie(Opc,
Base, Offset) = findAddrModeSVELoadStore(
1504 N, Opc_rr, Opc_ri,
N->getOperand(IsIntr ? 3 : 2),
1505 CurDAG->getTargetConstant(0,
DL,
MVT::i64), Scale);
1507 SDValue Ops[] = {
N->getOperand(IsIntr ? 2 : 1),
1513 SDNode *
Load = CurDAG->getMachineNode(Opc,
DL, ResTys, Ops);
1515 for (
unsigned i = 0;
i < NumVecs; ++
i)
1516 ReplaceUses(
SDValue(
N,
i), CurDAG->getTargetExtractSubreg(
1517 AArch64::zsub0 +
i,
DL, VT, SuperReg));
1520 unsigned ChainIdx = NumVecs;
1522 CurDAG->RemoveDeadNode(
N);
1525 void AArch64DAGToDAGISel::SelectStore(
SDNode *
N,
unsigned NumVecs,
1528 EVT VT =
N->getOperand(2)->getValueType(0);
1535 SDValue Ops[] = {RegSeq,
N->getOperand(NumVecs + 2),
N->getOperand(0)};
1536 SDNode *St = CurDAG->getMachineNode(Opc, dl,
N->getValueType(0), Ops);
1540 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
1545 void AArch64DAGToDAGISel::SelectPredicatedStore(
SDNode *
N,
unsigned NumVecs,
1546 unsigned Scale,
unsigned Opc_rr,
1552 SDValue RegSeq = createZTuple(Regs);
1557 std::tie(Opc,
Base, Offset) = findAddrModeSVELoadStore(
1558 N, Opc_rr, Opc_ri,
N->getOperand(NumVecs + 3),
1559 CurDAG->getTargetConstant(0, dl,
MVT::i64), Scale);
1561 SDValue Ops[] = {RegSeq,
N->getOperand(NumVecs + 2),
1565 SDNode *St = CurDAG->getMachineNode(Opc, dl,
N->getValueType(0), Ops);
1577 if (
auto FINode = dyn_cast<FrameIndexSDNode>(
N)) {
1578 int FI = FINode->getIndex();
1580 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
1587 void AArch64DAGToDAGISel::SelectPostStore(
SDNode *
N,
unsigned NumVecs,
1590 EVT VT =
N->getOperand(2)->getValueType(0);
1600 N->getOperand(NumVecs + 1),
1601 N->getOperand(NumVecs + 2),
1603 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1643 void AArch64DAGToDAGISel::SelectLoadLane(
SDNode *
N,
unsigned NumVecs,
1646 EVT VT =
N->getValueType(0);
1661 cast<ConstantSDNode>(
N->getOperand(NumVecs + 2))->getZExtValue();
1663 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl,
MVT::i64),
1664 N->getOperand(NumVecs + 3),
N->getOperand(0)};
1665 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1669 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1670 AArch64::qsub2, AArch64::qsub3 };
1671 for (
unsigned i = 0;
i < NumVecs; ++
i) {
1672 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[
i], dl, WideVT, SuperReg);
1679 CurDAG->RemoveDeadNode(
N);
1682 void AArch64DAGToDAGISel::SelectPostLoadLane(
SDNode *
N,
unsigned NumVecs,
1685 EVT VT =
N->getValueType(0);
1701 cast<ConstantSDNode>(
N->getOperand(NumVecs + 1))->getZExtValue();
1704 CurDAG->getTargetConstant(LaneNo, dl,
1706 N->getOperand(NumVecs + 2),
1707 N->getOperand(NumVecs + 3),
1709 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1721 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1722 AArch64::qsub2, AArch64::qsub3 };
1723 for (
unsigned i = 0;
i < NumVecs; ++
i) {
1724 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[
i], dl, WideVT,
1734 CurDAG->RemoveDeadNode(
N);
1737 void AArch64DAGToDAGISel::SelectStoreLane(
SDNode *
N,
unsigned NumVecs,
1740 EVT VT =
N->getOperand(2)->getValueType(0);
1753 cast<ConstantSDNode>(
N->getOperand(NumVecs + 2))->getZExtValue();
1755 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl,
MVT::i64),
1756 N->getOperand(NumVecs + 3),
N->getOperand(0)};
1761 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
1766 void AArch64DAGToDAGISel::SelectPostStoreLane(
SDNode *
N,
unsigned NumVecs,
1769 EVT VT =
N->getOperand(2)->getValueType(0);
1785 cast<ConstantSDNode>(
N->getOperand(NumVecs + 1))->getZExtValue();
1787 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl,
MVT::i64),
1788 N->getOperand(NumVecs + 2),
1789 N->getOperand(NumVecs + 3),
1791 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1795 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
1802 unsigned &LSB,
unsigned &MSB,
1803 unsigned NumberOfIgnoredLowBits,
1804 bool BiggerPattern) {
1806 "N must be a AND operation to call this function");
1808 EVT VT =
N->getValueType(0);
1814 "Type checking must have been done before calling this function");
1828 const SDNode *Op0 =
N->getOperand(0).getNode();
1832 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1835 if (AndImm & (AndImm + 1))
1838 bool ClampMSB =
false;
1859 }
else if (BiggerPattern) {
1865 Opd0 =
N->getOperand(0);
1871 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.
getSizeInBits())) {
1874 <<
": Found large shift immediate, this should not happen\n"));
1879 MSB = SrlImm + (VT ==
MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1880 : countTrailingOnes<uint64_t>(AndImm)) -
1887 MSB = MSB > 31 ? 31 : MSB;
1889 Opc = VT ==
MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1894 SDValue &Opd0,
unsigned &Immr,
1898 EVT VT =
N->getValueType(0);
1901 "Type checking must have been done before calling this function");
1905 Op =
Op->getOperand(0);
1906 VT =
Op->getValueType(0);
1915 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
1919 Opc = (VT ==
MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1920 Opd0 =
Op.getOperand(0);
1922 Imms = ShiftImm +
Width - 1;
1950 Opd0 =
N->getOperand(0).getOperand(0);
1958 if (BitWide &&
isMask_64(AndMask >> SrlImm)) {
1960 Opc = AArch64::UBFMWri;
1962 Opc = AArch64::UBFMXri;
1965 MSB = BitWide + SrlImm - 1;
1973 unsigned &Immr,
unsigned &Imms,
1974 bool BiggerPattern) {
1976 "N must be a SHR/SRA operation to call this function");
1978 EVT VT =
N->getValueType(0);
1984 "Type checking must have been done before calling this function");
1994 Opd0 =
N->getOperand(0).getOperand(0);
2001 Opd0 =
N->getOperand(0).getOperand(0);
2005 }
else if (BiggerPattern) {
2009 Opd0 =
N->getOperand(0);
2018 <<
": Found large shift immediate, this should not happen\n"));
2027 "bad amount in shift node!");
2028 int immr = SrlImm - ShlImm;
2033 Opc =
N->getOpcode() ==
ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2035 Opc =
N->getOpcode() ==
ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2039 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(
SDNode *
N) {
2042 EVT VT =
N->getValueType(0);
2043 EVT NarrowVT =
N->getOperand(0)->getValueType(0);
2055 unsigned Immr = ShiftImm;
2057 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2058 CurDAG->getTargetConstant(Imms, dl, VT)};
2059 CurDAG->SelectNodeTo(
N, AArch64::SBFMXri, VT, Ops);
2065 bool AArch64DAGToDAGISel::tryHighFPExt(
SDNode *
N) {
2069 SDValue Extract =
N->getOperand(0);
2070 EVT VT =
N->getValueType(0);
2087 auto Opcode = VT ==
MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
2088 CurDAG->SelectNodeTo(
N, Opcode, VT, Extract.
getOperand(0));
2093 SDValue &Opd0,
unsigned &Immr,
unsigned &Imms,
2094 unsigned NumberOfIgnoredLowBits = 0,
2095 bool BiggerPattern =
false) {
2099 switch (
N->getOpcode()) {
2101 if (!
N->isMachineOpcode())
2106 NumberOfIgnoredLowBits, BiggerPattern);
2115 unsigned NOpc =
N->getMachineOpcode();
2119 case AArch64::SBFMWri:
2120 case AArch64::UBFMWri:
2121 case AArch64::SBFMXri:
2122 case AArch64::UBFMXri:
2124 Opd0 =
N->getOperand(0);
2125 Immr = cast<ConstantSDNode>(
N->getOperand(1).getNode())->getZExtValue();
2126 Imms = cast<ConstantSDNode>(
N->getOperand(2).getNode())->getZExtValue();
2133 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(
SDNode *
N) {
2134 unsigned Opc, Immr, Imms;
2139 EVT VT =
N->getValueType(0);
2144 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT ==
MVT::i32) {
2145 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl,
MVT::i64),
2146 CurDAG->getTargetConstant(Imms, dl,
MVT::i64)};
2150 ReplaceNode(
N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2155 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2156 CurDAG->getTargetConstant(Imms, dl, VT)};
2157 CurDAG->SelectNodeTo(
N, Opc, VT, Ops);
2166 unsigned NumberOfIgnoredHighBits,
EVT VT) {
2168 "i32 or i64 mask type expected!");
2174 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2175 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2198 cast<const ConstantSDNode>(
Op.getOperand(1).getNode())->getZExtValue();
2208 APInt OpUsefulBits(UsefulBits);
2212 OpUsefulBits <<= MSB - Imm + 1;
2217 OpUsefulBits <<= Imm;
2219 OpUsefulBits <<= MSB + 1;
2222 OpUsefulBits <<= OpUsefulBits.
getBitWidth() - Imm;
2228 UsefulBits &= OpUsefulBits;
2234 cast<const ConstantSDNode>(
Op.getOperand(1).getNode())->getZExtValue();
2236 cast<const ConstantSDNode>(
Op.getOperand(2).getNode())->getZExtValue();
2244 cast<const ConstantSDNode>(
Op.getOperand(2).getNode())->getZExtValue();
2246 Mask.clearAllBits();
2254 Mask.lshrInPlace(ShiftAmt);
2260 Mask.lshrInPlace(ShiftAmt);
2272 cast<const ConstantSDNode>(
Op.getOperand(2).getNode())->getZExtValue();
2274 cast<const ConstantSDNode>(
Op.getOperand(3).getNode())->getZExtValue();
2276 APInt OpUsefulBits(UsefulBits);
2290 OpUsefulBits <<=
Width;
2293 if (
Op.getOperand(1) == Orig) {
2295 Mask = ResultUsefulBits & OpUsefulBits;
2299 if (
Op.getOperand(0) == Orig)
2301 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2307 OpUsefulBits <<=
Width;
2309 OpUsefulBits <<= LSB;
2311 if (
Op.getOperand(1) == Orig) {
2313 Mask = ResultUsefulBits & OpUsefulBits;
2314 Mask.lshrInPlace(LSB);
2317 if (
Op.getOperand(0) == Orig)
2318 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2335 case AArch64::ANDSWri:
2336 case AArch64::ANDSXri:
2337 case AArch64::ANDWri:
2338 case AArch64::ANDXri:
2342 case AArch64::UBFMWri:
2343 case AArch64::UBFMXri:
2346 case AArch64::ORRWrs:
2347 case AArch64::ORRXrs:
2352 case AArch64::BFMWri:
2353 case AArch64::BFMXri:
2356 case AArch64::STRBBui:
2357 case AArch64::STURBBi:
2363 case AArch64::STRHHui:
2364 case AArch64::STURHHi:
2377 unsigned Bitwidth =
Op.getScalarValueSizeInBits();
2379 UsefulBits =
APInt(Bitwidth, 0);
2384 for (
SDNode *Node :
Op.getNode()->uses()) {
2388 UsersUsefulBits |= UsefulBitsForUse;
2393 UsefulBits &= UsersUsefulBits;
2403 EVT VT =
Op.getValueType();
2406 unsigned UBFMOpc =
BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2409 if (ShlAmount > 0) {
2412 UBFMOpc, dl, VT,
Op,
2417 assert(ShlAmount < 0 &&
"expected right shift");
2418 int ShrAmount = -ShlAmount;
2431 SDValue &Src,
int &ShiftAmount,
2433 EVT VT =
Op.getValueType();
2449 Op =
Op.getOperand(0);
2454 if (!BiggerPattern && !
Op.hasOneUse())
2460 Op =
Op.getOperand(0);
2473 if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2492 EVT VT =
N->getValueType(0);
2511 if (!
And.hasOneUse() ||
2528 if ((OrImm & NotKnownZero) != 0) {
2540 unsigned ImmS =
Width - 1;
2546 bool IsBFI = LSB != 0;
2551 unsigned OrChunks = 0, BFIChunks = 0;
2553 if (((OrImm >>
Shift) & 0xFFFF) != 0)
2555 if (((BFIImm >>
Shift) & 0xFFFF) != 0)
2558 if (BFIChunks > OrChunks)
2564 unsigned MOVIOpc = VT ==
MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2572 unsigned Opc = (VT ==
MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2581 EVT VT =
N->getValueType(0);
2611 for (
int I = 0;
I < 4; ++
I) {
2614 unsigned ImmR, ImmS;
2615 bool BiggerPattern =
I / 2;
2616 SDValue OrOpd0Val =
N->getOperand(
I % 2);
2618 SDValue OrOpd1Val =
N->getOperand((
I + 1) % 2);
2624 NumberOfIgnoredLowBits, BiggerPattern)) {
2627 if ((BFXOpc != AArch64::UBFMXri && VT ==
MVT::i64) ||
2628 (BFXOpc != AArch64::UBFMWri && VT ==
MVT::i32))
2633 Width = ImmS - ImmR + 1;
2644 Src, DstLSB,
Width)) {
2662 APInt BitsToBeInserted =
2665 if ((BitsToBeInserted & ~Known.
Zero) != 0)
2683 unsigned Opc = (VT ==
MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2716 unsigned ShiftOpc = (VT ==
MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2718 if (Src->hasOneUse() &&
2721 Src = Src->getOperand(0);
2731 unsigned ImmS =
Width - 1;
2737 unsigned Opc = (VT ==
MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2745 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(
SDNode *
N) {
2754 CurDAG->SelectNodeTo(
N, TargetOpcode::IMPLICIT_DEF,
N->getValueType(0));
2767 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(
SDNode *
N) {
2771 EVT VT =
N->getValueType(0);
2778 Op0, DstLSB,
Width))
2784 unsigned ImmS =
Width - 1;
2787 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR,
DL, VT),
2788 CurDAG->getTargetConstant(ImmS,
DL, VT)};
2789 unsigned Opc = (VT ==
MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2790 CurDAG->SelectNodeTo(
N, Opc, VT, Ops);
2796 bool AArch64DAGToDAGISel::tryShiftAmountMod(
SDNode *
N) {
2797 EVT VT =
N->getValueType(0);
2800 switch (
N->getOpcode()) {
2802 Opc = (VT ==
MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2805 Opc = (VT ==
MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2808 Opc = (VT ==
MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2811 Opc = (VT ==
MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2828 SDValue ShiftAmt =
N->getOperand(1);
2848 (Add0Imm % Size == 0)) {
2855 NegOpc = AArch64::SUBWrr;
2856 ZeroReg = AArch64::WZR;
2859 NegOpc = AArch64::SUBXrr;
2860 ZeroReg = AArch64::XZR;
2863 CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
DL, ZeroReg, SubVT);
2865 CurDAG->getMachineNode(NegOpc,
DL, SubVT, Zero, Add1);
2866 NewShiftAmt =
SDValue(Neg, 0);
2875 NotOpc = AArch64::ORNWrr;
2876 ZeroReg = AArch64::WZR;
2879 NotOpc = AArch64::ORNXrr;
2880 ZeroReg = AArch64::XZR;
2883 CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
DL, ZeroReg, SubVT);
2885 CurDAG->getMachineNode(NotOpc,
DL, SubVT, Zero, Add1);
2886 NewShiftAmt =
SDValue(Not, 0);
2910 AArch64::SUBREG_TO_REG,
DL, VT,
2915 SDValue Ops[] = {
N->getOperand(0), NewShiftAmt};
2916 CurDAG->SelectNodeTo(
N, Opc, VT, Ops);
2921 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(
SDValue N,
SDValue &FixedPos,
2922 unsigned RegWidth) {
2925 FVal = CN->getValueAPF();
2926 else if (
LoadSDNode *LN = dyn_cast<LoadSDNode>(
N)) {
2929 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2933 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2934 FVal = cast<ConstantFP>(CN->
getConstVal())->getValueAPF();
2953 if (!IsExact || !
IntVal.isPowerOf2())
return false;
2954 unsigned FBits =
IntVal.logBase2();
2958 if (FBits == 0 || FBits > RegWidth)
return false;
2969 RegString.
split(Fields,
':');
2971 if (Fields.size() == 1)
2974 assert(Fields.size() == 5
2975 &&
"Invalid number of fields in read register string");
2978 bool AllIntFields =
true;
2982 AllIntFields &= !
Field.getAsInteger(10, IntField);
2983 Ops.push_back(IntField);
2987 "Unexpected non-integer value in special register string.");
2992 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2993 (Ops[3] << 3) | (Ops[4]);
3000 bool AArch64DAGToDAGISel::tryReadRegister(
SDNode *
N) {
3001 const auto *MD = cast<MDNodeSDNode>(
N->getOperand(1));
3002 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3007 ReplaceNode(
N, CurDAG->getMachineNode(
3017 if (TheReg && TheReg->Readable &&
3018 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3019 Reg = TheReg->Encoding;
3024 ReplaceNode(
N, CurDAG->getMachineNode(
3031 if (RegString->getString() ==
"pc") {
3032 ReplaceNode(
N, CurDAG->getMachineNode(
3046 bool AArch64DAGToDAGISel::tryWriteRegister(
SDNode *
N) {
3047 const auto *MD = cast<MDNodeSDNode>(
N->getOperand(1));
3048 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3056 N->getOperand(2),
N->getOperand(0)));
3065 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
3067 assert (isa<ConstantSDNode>(
N->getOperand(2))
3068 &&
"Expected a constant integer expression.");
3069 unsigned Reg = PMapper->Encoding;
3070 uint64_t Immed = cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue();
3072 if (
Reg == AArch64PState::PAN ||
Reg == AArch64PState::UAO ||
Reg == AArch64PState::SSBS) {
3073 assert(Immed < 2 &&
"Bad imm");
3074 State = AArch64::MSRpstateImm1;
3076 assert(Immed < 16 &&
"Bad imm");
3077 State = AArch64::MSRpstateImm4;
3079 ReplaceNode(
N, CurDAG->getMachineNode(
3082 CurDAG->getTargetConstant(Immed,
DL,
MVT::i16),
3091 if (TheReg && TheReg->Writeable &&
3092 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3093 Reg = TheReg->Encoding;
3097 ReplaceNode(
N, CurDAG->getMachineNode(
3100 N->getOperand(2),
N->getOperand(0)));
3108 bool AArch64DAGToDAGISel::SelectCMP_SWAP(
SDNode *
N) {
3110 EVT MemTy = cast<MemSDNode>(
N)->getMemoryVT();
3113 if (Subtarget->hasLSE())
return false;
3116 Opcode = AArch64::CMP_SWAP_8;
3118 Opcode = AArch64::CMP_SWAP_16;
3120 Opcode = AArch64::CMP_SWAP_32;
3122 Opcode = AArch64::CMP_SWAP_64;
3127 SDValue Ops[] = {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3),
3129 SDNode *CmpSwap = CurDAG->getMachineNode(
3134 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {
MemOp});
3138 CurDAG->RemoveDeadNode(
N);
3145 if (!isa<ConstantSDNode>(
N))
3158 Imm = CurDAG->getTargetConstant(Val,
DL,
MVT::i32);
3166 Imm = CurDAG->getTargetConstant(Val,
DL,
MVT::i32);
3170 if (Val <= 65280 && Val % 256 == 0) {
3172 Imm = CurDAG->getTargetConstant(Val >> 8,
DL,
MVT::i32);
3185 if (!isa<ConstantSDNode>(
N))
3189 int64_t Val = cast<ConstantSDNode>(
N)
3198 Imm = CurDAG->getTargetConstant(Val & 0xFF,
DL,
MVT::i32);
3204 if (Val >= -128 && Val <= 127) {
3206 Imm = CurDAG->getTargetConstant(Val & 0xFF,
DL,
MVT::i32);
3210 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
3212 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF,
DL,
MVT::i32);
3223 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(
SDValue N,
SDValue &Imm) {
3224 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
3225 int64_t ImmVal = CNode->getSExtValue();
3227 if (ImmVal >= -128 && ImmVal < 128) {
3228 Imm = CurDAG->getTargetConstant(ImmVal,
DL,
MVT::i32);
3236 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
3237 uint64_t ImmVal = CNode->getZExtValue();
3247 ImmVal &= 0xFFFFFFFF;
3265 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
3266 uint64_t ImmVal = CNode->getZExtValue();
3276 ImmVal |= ImmVal << 8;
3277 ImmVal |= ImmVal << 16;
3278 ImmVal |= ImmVal << 32;
3282 ImmVal |= ImmVal << 16;
3283 ImmVal |= ImmVal << 32;
3286 ImmVal &= 0xFFFFFFFF;
3287 ImmVal |= ImmVal << 32;
3297 Imm = CurDAG->getTargetConstant(encoding,
DL,
MVT::i64);
3312 if (
auto *CN = dyn_cast<ConstantSDNode>(
N)) {
3313 uint64_t ImmVal = CN->getZExtValue();
3320 if (ImmVal >
High) {
3321 if (!AllowSaturation)
3333 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(
SDNode *
N) {
3337 if (!(isa<FrameIndexSDNode>(
N->getOperand(1)))) {
3343 cast<ConstantSDNode>(IRG_SP->
getOperand(1))->getZExtValue() !=
3344 Intrinsic::aarch64_irg_sp) {
3350 int FI = cast<FrameIndexSDNode>(
N->getOperand(1))->getIndex();
3351 SDValue FiOp = CurDAG->getTargetFrameIndex(
3353 int TagOffset = cast<ConstantSDNode>(
N->getOperand(3))->getZExtValue();
3355 SDNode *Out = CurDAG->getMachineNode(
3357 {FiOp, CurDAG->getTargetConstant(0,
DL,
MVT::i64),
N->getOperand(2),
3358 CurDAG->getTargetConstant(TagOffset,
DL,
MVT::i64)});
3359 ReplaceNode(
N, Out);
3363 void AArch64DAGToDAGISel::SelectTagP(
SDNode *
N) {
3364 assert(isa<ConstantSDNode>(
N->getOperand(3)) &&
3365 "llvm.aarch64.tagp third argument must be an immediate");
3366 if (trySelectStackSlotTagP(
N))
3373 int TagOffset = cast<ConstantSDNode>(
N->getOperand(3))->getZExtValue();
3375 {
N->getOperand(1),
N->getOperand(2)});
3377 {
SDValue(N1, 0),
N->getOperand(2)});
3378 SDNode *N3 = CurDAG->getMachineNode(
3381 CurDAG->getTargetConstant(TagOffset,
DL,
MVT::i64)});
3391 "Expected to extract from a packed scalable vector!");
3393 "Expected to extract a fixed length vector!");
3417 "Expected to insert into a packed scalable vector!");
3419 "Expected to insert a fixed length vector!");
3444 if (Node->isMachineOpcode()) {
3446 Node->setNodeId(-1);
3451 EVT VT = Node->getValueType(0);
3453 switch (Node->getOpcode()) {
3458 if (SelectCMP_SWAP(Node))
3463 if (tryReadRegister(Node))
3468 if (tryWriteRegister(Node))
3473 if (tryMLAV64LaneV128(Node))
3480 if (tryIndexedLoad(Node))
3489 if (tryBitfieldExtractOp(Node))
3491 if (tryBitfieldInsertInZeroOp(Node))
3496 if (tryShiftAmountMod(Node))
3501 if (tryBitfieldExtractOpFromSExt(Node))
3506 if (tryHighFPExt(Node))
3511 if (tryBitfieldInsertOp(Node))
3517 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
3521 EVT InVT = Node->getOperand(0).getValueType();
3533 ReplaceNode(Node,
extractSubReg(CurDAG, VT, Node->getOperand(0)));
3539 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
3541 if (!Node->getOperand(0).isUndef())
3545 EVT InVT = Node->getOperand(1).getValueType();
3557 ReplaceNode(Node,
insertSubReg(CurDAG, VT, Node->getOperand(1)));
3565 if (ConstNode->
isZero()) {
3568 CurDAG->getEntryNode(),
SDLoc(Node), AArch64::WZR,
MVT::i32);
3569 ReplaceNode(Node,
New.getNode());
3573 CurDAG->getEntryNode(),
SDLoc(Node), AArch64::XZR,
MVT::i64);
3574 ReplaceNode(Node,
New.getNode());
3583 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
3586 SDValue TFI = CurDAG->getTargetFrameIndex(
3590 CurDAG->getTargetConstant(Shifter,
DL,
MVT::i32) };
3591 CurDAG->SelectNodeTo(Node, AArch64::ADDXri,
MVT::i64, Ops);
3595 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3599 case Intrinsic::aarch64_ldaxp:
3600 case Intrinsic::aarch64_ldxp: {
3602 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
3603 SDValue MemAddr = Node->getOperand(2);
3605 SDValue Chain = Node->getOperand(0);
3612 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3613 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {
MemOp});
3614 ReplaceNode(Node, Ld);
3617 case Intrinsic::aarch64_stlxp:
3618 case Intrinsic::aarch64_stxp: {
3620 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
3622 SDValue Chain = Node->getOperand(0);
3623 SDValue ValLo = Node->getOperand(2);
3624 SDValue ValHi = Node->getOperand(3);
3625 SDValue MemAddr = Node->getOperand(4);
3628 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
3633 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3634 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
3636 ReplaceNode(Node, St);
3639 case Intrinsic::aarch64_neon_ld1x2:
3641 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
3644 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
3647 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
3650 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
3653 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
3656 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
3659 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3662 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
3666 case Intrinsic::aarch64_neon_ld1x3:
3668 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3671 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3674 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3677 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3680 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3683 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3686 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3689 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3693 case Intrinsic::aarch64_neon_ld1x4:
3695 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3698 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3701 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3704 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3707 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3710 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3713 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3716 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3720 case Intrinsic::aarch64_neon_ld2:
3722 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3725 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3728 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3731 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3734 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3737 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3740 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3743 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3747 case Intrinsic::aarch64_neon_ld3:
3749 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3752 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3755 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3758 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3761 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3764 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3767 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3770 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3774 case Intrinsic::aarch64_neon_ld4:
3776 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3779 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3782 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3785 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3788 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3791 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3794 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3797 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3801 case Intrinsic::aarch64_neon_ld2r:
3803 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3806 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3809 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3812 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3815 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3818 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3821 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3824 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3828 case Intrinsic::aarch64_neon_ld3r:
3830 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3833 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3836 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3839 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3842 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3845 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3848 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3851 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3855 case Intrinsic::aarch64_neon_ld4r:
3857 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3860 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3863 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3866 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3869 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3872 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3875 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3878 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3882 case Intrinsic::aarch64_neon_ld2lane:
3884 SelectLoadLane(Node, 2, AArch64::LD2i8);
3888 SelectLoadLane(Node, 2, AArch64::LD2i16);
3892 SelectLoadLane(Node, 2, AArch64::LD2i32);
3896 SelectLoadLane(Node, 2, AArch64::LD2i64);
3900 case Intrinsic::aarch64_neon_ld3lane:
3902 SelectLoadLane(Node, 3, AArch64::LD3i8);
3906 SelectLoadLane(Node, 3, AArch64::LD3i16);
3910 SelectLoadLane(Node, 3, AArch64::LD3i32);
3914 SelectLoadLane(Node, 3, AArch64::LD3i64);
3918 case Intrinsic::aarch64_neon_ld4lane:
3920 SelectLoadLane(Node, 4, AArch64::LD4i8);
3924 SelectLoadLane(Node, 4, AArch64::LD4i16);
3928 SelectLoadLane(Node, 4, AArch64::LD4i32);
3932 SelectLoadLane(Node, 4, AArch64::LD4i64);
3936 case Intrinsic::aarch64_ld64b:
3937 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
3939 case Intrinsic::aarch64_sve_ld2_sret: {
3941 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
3946 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
3950 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
3954 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
3960 case Intrinsic::aarch64_sve_ld3_sret: {
3962 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
3967 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
3971 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
3975 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
3981 case Intrinsic::aarch64_sve_ld4_sret: {
3983 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
3988 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
3992 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
3996 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
4005 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
4009 case Intrinsic::aarch64_tagp:
4012 case Intrinsic::aarch64_neon_tbl2:
4013 SelectTable(Node, 2,
4014 VT ==
MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
4017 case Intrinsic::aarch64_neon_tbl3:
4018 SelectTable(Node, 3, VT ==
MVT::v8i8 ? AArch64::TBLv8i8Three
4019 : AArch64::TBLv16i8Three,
4022 case Intrinsic::aarch64_neon_tbl4:
4023 SelectTable(Node, 4, VT ==
MVT::v8i8 ? AArch64::TBLv8i8Four
4024 : AArch64::TBLv16i8Four,
4027 case Intrinsic::aarch64_neon_tbx2:
4028 SelectTable(Node, 2,
4029 VT ==
MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
4032 case Intrinsic::aarch64_neon_tbx3:
4033 SelectTable(Node, 3, VT ==
MVT::v8i8 ? AArch64::TBXv8i8Three
4034 : AArch64::TBXv16i8Three,
4037 case Intrinsic::aarch64_neon_tbx4:
4038 SelectTable(Node, 4, VT ==
MVT::v8i8 ? AArch64::TBXv8i8Four
4039 : AArch64::TBXv16i8Four,
4042 case Intrinsic::aarch64_neon_smull:
4043 case Intrinsic::aarch64_neon_umull:
4044 if (tryMULLV64LaneV128(IntNo, Node))
4047 case Intrinsic::swift_async_context_addr: {
4049 CurDAG->SelectNodeTo(Node, AArch64::SUBXri,
MVT::i64,
4050 CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
DL,
4054 auto &MF = CurDAG->getMachineFunction();
4055 MF.getFrameInfo().setFrameAddressIsTaken(
true);
4063 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
4064 if (Node->getNumOperands() >= 3)
4065 VT = Node->getOperand(2)->getValueType(0);
4069 case Intrinsic::aarch64_neon_st1x2: {
4071 SelectStore(Node, 2, AArch64::ST1Twov8b);
4074 SelectStore(Node, 2, AArch64::ST1Twov16b);
4078 SelectStore(Node, 2, AArch64::ST1Twov4h);
4082 SelectStore(Node, 2, AArch64::ST1Twov8h);
4085 SelectStore(Node, 2, AArch64::ST1Twov2s);
4088 SelectStore(Node, 2, AArch64::ST1Twov4s);
4091 SelectStore(Node, 2, AArch64::ST1Twov2d);
4094 SelectStore(Node, 2, AArch64::ST1Twov1d);
4099 case Intrinsic::aarch64_neon_st1x3: {
4101 SelectStore(Node, 3, AArch64::ST1Threev8b);
4104 SelectStore(Node, 3, AArch64::ST1Threev16b);
4108 SelectStore(Node, 3, AArch64::ST1Threev4h);
4112 SelectStore(Node, 3, AArch64::ST1Threev8h);
4115 SelectStore(Node, 3, AArch64::ST1Threev2s);
4118 SelectStore(Node, 3, AArch64::ST1Threev4s);
4121 SelectStore(Node, 3, AArch64::ST1Threev2d);
4124 SelectStore(Node, 3, AArch64::ST1Threev1d);
4129 case Intrinsic::aarch64_neon_st1x4: {
4131 SelectStore(Node, 4, AArch64::ST1Fourv8b);
4134 SelectStore(Node, 4, AArch64::ST1Fourv16b);
4138 SelectStore(Node, 4, AArch64::ST1Fourv4h);
4142 SelectStore(Node, 4, AArch64::ST1Fourv8h);
4145 SelectStore(Node, 4, AArch64::ST1Fourv2s);
4148 SelectStore(Node, 4, AArch64::ST1Fourv4s);
4151 SelectStore(Node, 4, AArch64::ST1Fourv2d);
4154 SelectStore(Node, 4, AArch64::ST1Fourv1d);
4159 case Intrinsic::aarch64_neon_st2: {
4161 SelectStore(Node, 2, AArch64::ST2Twov8b);
4164 SelectStore(Node, 2, AArch64::ST2Twov16b);
4168 SelectStore(Node, 2, AArch64::ST2Twov4h);
4172 SelectStore(Node, 2, AArch64::ST2Twov8h);
4175 SelectStore(Node, 2, AArch64::ST2Twov2s);
4178 SelectStore(Node, 2, AArch64::ST2Twov4s);
4181 SelectStore(Node, 2, AArch64::ST2Twov2d);
4184 SelectStore(Node, 2, AArch64::ST1Twov1d);
4189 case Intrinsic::aarch64_neon_st3: {
4191 SelectStore(Node, 3, AArch64::ST3Threev8b);
4194 SelectStore(Node, 3, AArch64::ST3Threev16b);
4198 SelectStore(Node, 3, AArch64::ST3Threev4h);
4202 SelectStore(Node, 3, AArch64::ST3Threev8h);
4205 SelectStore(Node, 3, AArch64::ST3Threev2s);
4208 SelectStore(Node, 3, AArch64::ST3Threev4s);
4211 SelectStore(Node, 3, AArch64::ST3Threev2d);
4214 SelectStore(Node, 3, AArch64::ST1Threev1d);
4219 case Intrinsic::aarch64_neon_st4: {
4221 SelectStore(Node, 4, AArch64::ST4Fourv8b);
4224 SelectStore(Node, 4, AArch64::ST4Fourv16b);
4228 SelectStore(Node, 4, AArch64::ST4Fourv4h);
4232 SelectStore(Node, 4, AArch64::ST4Fourv8h);
4235 SelectStore(Node, 4, AArch64::ST4Fourv2s);
4238 SelectStore(Node, 4, AArch64::ST4Fourv4s);
4241 SelectStore(Node, 4, AArch64::ST4Fourv2d);
4244 SelectStore(Node, 4, AArch64::ST1Fourv1d);
4249 case Intrinsic::aarch64_neon_st2lane: {
4251 SelectStoreLane(Node, 2, AArch64::ST2i8);
4255 SelectStoreLane(Node, 2, AArch64::ST2i16);
4259 SelectStoreLane(Node, 2, AArch64::ST2i32);
4263 SelectStoreLane(Node, 2, AArch64::ST2i64);
4268 case Intrinsic::aarch64_neon_st3lane: {
4270 SelectStoreLane(Node, 3, AArch64::ST3i8);
4274 SelectStoreLane(Node, 3, AArch64::ST3i16);
4278 SelectStoreLane(Node, 3, AArch64::ST3i32);
4282 SelectStoreLane(Node, 3, AArch64::ST3i64);
4287 case Intrinsic::aarch64_neon_st4lane: {
4289 SelectStoreLane(Node, 4, AArch64::ST4i8);
4293 SelectStoreLane(Node, 4, AArch64::ST4i16);
4297 SelectStoreLane(Node, 4, AArch64::ST4i32);
4301 SelectStoreLane(Node, 4, AArch64::ST4i64);
4306 case Intrinsic::aarch64_sve_st2: {
4308 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
4312 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
4315 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
4318 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
4323 case Intrinsic::aarch64_sve_st3: {
4325 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
4329 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
4332 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
4335 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
4340 case Intrinsic::aarch64_sve_st4: {
4342 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
4346 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
4349 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
4352 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
4362 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
4365 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
4368 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
4371 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
4374 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
4377 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
4380 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4383 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
4390 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
4393 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
4396 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
4399 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
4402 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
4405 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
4408 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4411 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
4418 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
4421 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
4424 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
4427 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
4430 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
4433 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
4436 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4439 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
4446 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
4449 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
4452 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
4455 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
4458 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
4461 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
4464 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4467 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
4474 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
4477 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
4480 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
4483 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
4486 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
4489 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
4492 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4495 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
4502 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
4505 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
4508 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
4511 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
4514 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
4517 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
4520 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4523 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
4530 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
4533 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
4536 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
4539 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
4542 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
4545 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
4548 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
4551 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
4558 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
4561 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
4564 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
4567 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
4570 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
4573 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
4576 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
4579 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
4586 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
4589 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
4592 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
4595 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
4598 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
4601 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
4604 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
4607 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
4614 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
4617 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
4620 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
4623 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
4626 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
4629 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
4632 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
4635 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
4642 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
4646 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
4650 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
4654 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
4661 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
4665 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
4669 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
4673 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
4680 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
4684 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
4688 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
4692 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
4699 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
4703 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
4707 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
4711 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
4717 VT = Node->getOperand(1).getValueType();
4719 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
4722 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
4725 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
4728 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
4731 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
4734 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
4737 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
4740 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4746 VT = Node->getOperand(1).getValueType();
4748 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
4751 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
4754 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
4757 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
4760 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
4763 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
4766 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
4769 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4775 VT = Node->getOperand(1).getValueType();
4777 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
4780 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
4783 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
4786 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
4789 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
4792 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
4795 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
4798 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4804 VT = Node->getOperand(1).getValueType();
4806 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
4809 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4812 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4815 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4818 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4821 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4824 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4827 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4833 VT = Node->getOperand(1).getValueType();
4835 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4838 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4841 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4844 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4847 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4850 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4853 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4856 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4862 VT = Node->getOperand(1).getValueType();
4864 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4867 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4870 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);