21 #include "llvm/IR/IntrinsicsAArch64.h"
30 #define DEBUG_TYPE "aarch64-isel"
50 return "AArch64 Instruction Selection";
62 bool SelectInlineAsmMemoryOperand(
const SDValue &
Op,
63 unsigned ConstraintID,
64 std::vector<SDValue> &OutOps)
override;
66 template <
signed Low,
signed High,
signed Scale>
69 bool tryMLAV64LaneV128(
SDNode *
N);
70 bool tryMULLV64LaneV128(
unsigned IntNo,
SDNode *
N);
75 return SelectShiftedRegister(
N,
false,
Reg,
Shift);
78 return SelectShiftedRegister(
N,
true,
Reg,
Shift);
81 return SelectAddrModeIndexed7S(
N, 1, Base, OffImm);
84 return SelectAddrModeIndexed7S(
N, 2, Base, OffImm);
87 return SelectAddrModeIndexed7S(
N, 4, Base, OffImm);
90 return SelectAddrModeIndexed7S(
N, 8, Base, OffImm);
93 return SelectAddrModeIndexed7S(
N, 16, Base, OffImm);
96 return SelectAddrModeIndexedBitWidth(
N,
true, 9, 16, Base, OffImm);
99 return SelectAddrModeIndexedBitWidth(
N,
false, 6, 16, Base, OffImm);
102 return SelectAddrModeIndexed(
N, 1, Base, OffImm);
105 return SelectAddrModeIndexed(
N, 2, Base, OffImm);
108 return SelectAddrModeIndexed(
N, 4, Base, OffImm);
111 return SelectAddrModeIndexed(
N, 8, Base, OffImm);
114 return SelectAddrModeIndexed(
N, 16, Base, OffImm);
117 return SelectAddrModeUnscaled(
N, 1, Base, OffImm);
120 return SelectAddrModeUnscaled(
N, 2, Base, OffImm);
123 return SelectAddrModeUnscaled(
N, 4, Base, OffImm);
126 return SelectAddrModeUnscaled(
N, 8, Base, OffImm);
129 return SelectAddrModeUnscaled(
N, 16, Base, OffImm);
135 return SelectAddrModeWRO(
N,
Width / 8, Base,
Offset, SignExtend, DoShift);
141 return SelectAddrModeXRO(
N,
Width / 8, Base,
Offset, SignExtend, DoShift);
144 bool SelectDupZeroOrUndef(
SDValue N) {
145 switch(
N->getOpcode()) {
150 auto Opnd0 =
N->getOperand(0);
151 if (
auto CN = dyn_cast<ConstantSDNode>(Opnd0))
152 if (CN->isNullValue())
154 if (
auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
167 switch(
N->getOpcode()) {
170 auto Opnd0 =
N->getOperand(0);
171 if (
auto CN = dyn_cast<ConstantSDNode>(Opnd0))
172 if (CN->isNullValue())
174 if (
auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
184 template<MVT::SimpleValueType VT>
186 return SelectSVEAddSubImm(
N, VT, Imm,
Shift);
189 template<MVT::SimpleValueType VT>
191 return SelectSVELogicalImm(
N, VT, Imm);
194 template <MVT::SimpleValueType VT>
196 return SelectSVEArithImm(
N, VT, Imm);
199 template <
unsigned Low,
unsigned High,
bool AllowSaturation = false>
201 return SelectSVEShiftImm(
N, Low,
High, AllowSaturation, Imm);
205 template<
signed Min,
signed Max,
signed Scale,
bool Shift>
207 if (!isa<ConstantSDNode>(
N))
210 int64_t MulImm = cast<ConstantSDNode>(
N)->getSExtValue();
212 MulImm = 1LL << MulImm;
214 if ((MulImm %
std::abs(Scale)) != 0)
218 if ((MulImm >= Min) && (MulImm <= Max)) {
239 const unsigned SubRegs[]);
241 void SelectTable(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
bool isExt);
243 bool tryIndexedLoad(
SDNode *
N);
245 bool trySelectStackSlotTagP(
SDNode *
N);
248 void SelectLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
250 void SelectPostLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
252 void SelectLoadLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
253 void SelectPostLoadLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
254 void SelectPredicatedLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Scale,
255 unsigned Opc_rr,
unsigned Opc_ri);
259 template <
int64_t Min,
int64_t Max>
263 template <
unsigned Scale>
265 return SelectSVERegRegAddrMode(
N, Scale, Base,
Offset);
268 void SelectStore(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
269 void SelectPostStore(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
270 void SelectStoreLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
271 void SelectPostStoreLane(
SDNode *
N,
unsigned NumVecs,
unsigned Opc);
272 void SelectPredicatedStore(
SDNode *
N,
unsigned NumVecs,
unsigned Scale,
273 unsigned Opc_rr,
unsigned Opc_ri);
274 std::tuple<unsigned, SDValue, SDValue>
275 findAddrModeSVELoadStore(
SDNode *
N,
unsigned Opc_rr,
unsigned Opc_ri,
279 bool tryBitfieldExtractOp(
SDNode *
N);
280 bool tryBitfieldExtractOpFromSExt(
SDNode *
N);
281 bool tryBitfieldInsertOp(
SDNode *
N);
282 bool tryBitfieldInsertInZeroOp(
SDNode *
N);
283 bool tryShiftAmountMod(
SDNode *
N);
286 bool tryReadRegister(
SDNode *
N);
287 bool tryWriteRegister(
SDNode *
N);
290 #include "AArch64GenDAGISel.inc"
297 return SelectAddrModeIndexedBitWidth(
N,
true, 7,
Size, Base, OffImm);
299 bool SelectAddrModeIndexedBitWidth(
SDValue N,
bool IsSignedImm,
unsigned BW,
312 bool isWorthFolding(
SDValue V)
const;
313 bool SelectExtendedSHL(
SDValue N,
unsigned Size,
bool WantExtend,
316 template<
unsigned RegW
idth>
318 return SelectCVTFixedPosOperand(
N, FixedPos, RegWidth);
323 bool SelectCMP_SWAP(
SDNode *
N);
332 bool SelectSVEShiftImm(
SDValue N, uint64_t Low, uint64_t
High,
333 bool AllowSaturation,
SDValue &Imm);
345 Imm =
C->getZExtValue();
362 return N->getOpcode() == Opc &&
366 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
367 const SDValue &
Op,
unsigned ConstraintID, std::vector<SDValue> &OutOps) {
368 switch(ConstraintID) {
381 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
382 dl,
Op.getValueType(),
384 OutOps.push_back(NewOp);
400 if (!isa<ConstantSDNode>(
N.getNode()))
403 uint64_t Immed = cast<ConstantSDNode>(
N.getNode())->getZExtValue();
406 if (Immed >> 12 == 0) {
408 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
416 Val = CurDAG->getTargetConstant(Immed, dl,
MVT::i32);
430 if (!isa<ConstantSDNode>(
N.getNode()))
434 uint64_t Immed = cast<ConstantSDNode>(
N.getNode())->getZExtValue();
445 Immed = ~Immed + 1ULL;
446 if (Immed & 0xFFFFFFFFFF000000ULL)
449 Immed &= 0xFFFFFFULL;
450 return SelectArithImmed(CurDAG->getConstant(Immed,
SDLoc(
N),
MVT::i32), Val,
457 switch (
N.getOpcode()) {
476 auto *CSD = dyn_cast<ConstantSDNode>(V.
getOperand(1));
479 unsigned ShiftVal = CSD->getZExtValue();
487 for (
SDNode *UI : Node->uses())
488 if (!isa<MemSDNode>(*UI))
490 if (!isa<MemSDNode>(*UII))
496 bool AArch64DAGToDAGISel::isWorthFolding(
SDValue V)
const {
499 if (CurDAG->shouldOptForSize() || V.
hasOneUse())
524 bool AArch64DAGToDAGISel::SelectShiftedRegister(
SDValue N,
bool AllowROR,
533 unsigned BitSize =
N.getValueSizeInBits();
534 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
537 Reg =
N.getOperand(0);
539 return isWorthFolding(
N);
553 SrcVT = cast<VTSDNode>(
N.getOperand(1))->getVT();
555 SrcVT =
N.getOperand(0).getValueType();
557 if (!IsLoadStore && SrcVT ==
MVT::i8)
559 else if (!IsLoadStore && SrcVT ==
MVT::i16)
568 EVT SrcVT =
N.getOperand(0).getValueType();
569 if (!IsLoadStore && SrcVT ==
MVT::i8)
571 else if (!IsLoadStore && SrcVT ==
MVT::i16)
624 SDValue &LaneOp,
int &LaneIdx) {
638 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(
SDNode *
N) {
658 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
660 unsigned MLAOpc = ~0U;
662 switch (
N->getSimpleValueType(0).SimpleTy) {
666 MLAOpc = AArch64::MLAv4i16_indexed;
669 MLAOpc = AArch64::MLAv8i16_indexed;
672 MLAOpc = AArch64::MLAv2i32_indexed;
675 MLAOpc = AArch64::MLAv4i32_indexed;
679 ReplaceNode(
N, CurDAG->getMachineNode(MLAOpc, dl,
N->getValueType(0), Ops));
683 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(
unsigned IntNo,
SDNode *
N) {
695 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
697 unsigned SMULLOpc = ~0U;
699 if (IntNo == Intrinsic::aarch64_neon_smull) {
700 switch (
N->getSimpleValueType(0).SimpleTy) {
704 SMULLOpc = AArch64::SMULLv4i16_indexed;
707 SMULLOpc = AArch64::SMULLv2i32_indexed;
710 }
else if (IntNo == Intrinsic::aarch64_neon_umull) {
711 switch (
N->getSimpleValueType(0).SimpleTy) {
715 SMULLOpc = AArch64::UMULLv4i16_indexed;
718 SMULLOpc = AArch64::UMULLv2i32_indexed;
724 ReplaceNode(
N, CurDAG->getMachineNode(SMULLOpc, dl,
N->getValueType(0), Ops));
744 template<
signed Low,
signed High,
signed Scale>
746 if (!isa<ConstantSDNode>(
N))
749 int64_t MulImm = cast<ConstantSDNode>(
N)->getSExtValue();
750 if ((MulImm %
std::abs(Scale)) == 0) {
751 int64_t RDVLImm = MulImm / Scale;
752 if ((RDVLImm >= Low) && (RDVLImm <=
High)) {
765 unsigned ShiftVal = 0;
780 Reg =
N.getOperand(0).getOperand(0);
786 Reg =
N.getOperand(0);
790 Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*
Reg.getNode()))
803 return isWorthFolding(
N);
812 for (
auto Use :
N->uses()) {
830 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(
SDValue N,
bool IsSignedImm,
831 unsigned BW,
unsigned Size,
838 int FI = cast<FrameIndexSDNode>(
N)->getIndex();
840 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
846 if (CurDAG->isBaseWithConstantOffset(
N)) {
849 int64_t RHSC = RHS->getSExtValue();
851 int64_t Range = 0x1LL << (BW - 1);
853 if ((RHSC & (
Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
854 RHSC < (Range << Scale)) {
855 Base =
N.getOperand(0);
857 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
860 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl,
MVT::i64);
865 uint64_t RHSC = RHS->getZExtValue();
867 uint64_t Range = 0x1ULL << BW;
869 if ((RHSC & (
Size - 1)) == 0 && RHSC < (Range << Scale)) {
870 Base =
N.getOperand(0);
872 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
875 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl,
MVT::i64);
886 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
893 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(
SDValue N,
unsigned Size,
899 int FI = cast<FrameIndexSDNode>(
N)->getIndex();
901 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
907 dyn_cast<GlobalAddressSDNode>(
N.getOperand(1).getNode());
908 Base =
N.getOperand(0);
909 OffImm =
N.getOperand(1);
918 if (CurDAG->isBaseWithConstantOffset(
N)) {
920 int64_t RHSC = (int64_t)RHS->getZExtValue();
922 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
923 Base =
N.getOperand(0);
925 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
928 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl,
MVT::i64);
936 if (SelectAddrModeUnscaled(
N,
Size, Base, OffImm))
944 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
953 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(
SDValue N,
unsigned Size,
956 if (!CurDAG->isBaseWithConstantOffset(
N))
959 int64_t RHSC = RHS->getSExtValue();
961 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 &&
964 if (RHSC >= -256 && RHSC < 256) {
965 Base =
N.getOperand(0);
967 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
969 Base = CurDAG->getTargetFrameIndex(
991 bool AArch64DAGToDAGISel::SelectExtendedSHL(
SDValue N,
unsigned Size,
1011 SignExtend = CurDAG->getTargetConstant(0, dl,
MVT::i32);
1017 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1020 return isWorthFolding(
N);
1023 bool AArch64DAGToDAGISel::SelectAddrModeWRO(
SDValue N,
unsigned Size,
1035 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1041 const SDNode *Node =
N.getNode();
1042 for (
SDNode *UI : Node->uses()) {
1043 if (!isa<MemSDNode>(*UI))
1048 bool IsExtendedRegisterWorthFolding = isWorthFolding(
N);
1052 SelectExtendedSHL(RHS,
Size,
true,
Offset, SignExtend)) {
1054 DoShift = CurDAG->getTargetConstant(
true, dl,
MVT::i32);
1060 SelectExtendedSHL(LHS,
Size,
true,
Offset, SignExtend)) {
1062 DoShift = CurDAG->getTargetConstant(
true, dl,
MVT::i32);
1067 DoShift = CurDAG->getTargetConstant(
false, dl,
MVT::i32);
1071 if (IsExtendedRegisterWorthFolding &&
1078 if (isWorthFolding(LHS))
1083 if (IsExtendedRegisterWorthFolding &&
1090 if (isWorthFolding(RHS))
1102 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1105 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1107 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1108 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1112 bool AArch64DAGToDAGISel::SelectAddrModeXRO(
SDValue N,
unsigned Size,
1125 const SDNode *Node =
N.getNode();
1126 for (
SDNode *UI : Node->uses()) {
1127 if (!isa<MemSDNode>(*UI))
1142 if (isa<ConstantSDNode>(RHS)) {
1143 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1148 if ((ImmOff %
Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1154 CurDAG->getMachineNode(AArch64::MOVi64imm,
DL,
MVT::i64, Ops);
1161 bool IsExtendedRegisterWorthFolding = isWorthFolding(
N);
1165 SelectExtendedSHL(RHS,
Size,
false,
Offset, SignExtend)) {
1167 DoShift = CurDAG->getTargetConstant(
true,
DL,
MVT::i32);
1173 SelectExtendedSHL(LHS,
Size,
false,
Offset, SignExtend)) {
1175 DoShift = CurDAG->getTargetConstant(
true,
DL,
MVT::i32);
1182 SignExtend = CurDAG->getTargetConstant(
false,
DL,
MVT::i32);
1183 DoShift = CurDAG->getTargetConstant(
false,
DL,
MVT::i32);
1189 static const unsigned RegClassIDs[] = {
1190 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1191 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1192 AArch64::dsub2, AArch64::dsub3};
1198 static const unsigned RegClassIDs[] = {
1199 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1200 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1201 AArch64::qsub2, AArch64::qsub3};
1207 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1208 AArch64::ZPR3RegClassID,
1209 AArch64::ZPR4RegClassID};
1210 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1211 AArch64::zsub2, AArch64::zsub3};
1217 const unsigned RegClassIDs[],
1218 const unsigned SubRegs[]) {
1221 if (Regs.
size() == 1)
1232 CurDAG->getTargetConstant(RegClassIDs[Regs.
size() - 2],
DL,
MVT::i32));
1235 for (
unsigned i = 0;
i < Regs.
size(); ++
i) {
1236 Ops.push_back(Regs[
i]);
1237 Ops.push_back(CurDAG->getTargetConstant(SubRegs[
i],
DL,
MVT::i32));
1241 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL,
MVT::Untyped, Ops);
1245 void AArch64DAGToDAGISel::SelectTable(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
1248 EVT VT =
N->getValueType(0);
1250 unsigned ExtOff = isExt;
1253 unsigned Vec0Off = ExtOff + 1;
1255 N->op_begin() + Vec0Off + NumVecs);
1256 SDValue RegSeq = createQTuple(Regs);
1260 Ops.push_back(
N->getOperand(1));
1261 Ops.push_back(RegSeq);
1262 Ops.push_back(
N->getOperand(NumVecs + ExtOff + 1));
1263 ReplaceNode(
N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1266 bool AArch64DAGToDAGISel::tryIndexedLoad(
SDNode *
N) {
1268 if (
LD->isUnindexed())
1270 EVT VT =
LD->getMemoryVT();
1271 EVT DstVT =
N->getValueType(0);
1278 unsigned Opcode = 0;
1281 bool InsertTo64 =
false;
1283 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1286 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1288 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1290 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1299 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1301 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1303 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1312 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1314 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1316 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1323 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1325 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1327 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1329 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1331 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1341 SDNode *Res = CurDAG->getMachineNode(Opcode, dl,
MVT::i64, DstVT,
1346 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {
MemOp});
1353 SDValue(CurDAG->getMachineNode(
1354 AArch64::SUBREG_TO_REG, dl,
MVT::i64,
1355 CurDAG->getTargetConstant(0, dl,
MVT::i64), LoadedVal,
1360 ReplaceUses(
SDValue(
N, 0), LoadedVal);
1363 CurDAG->RemoveDeadNode(
N);
1367 void AArch64DAGToDAGISel::SelectLoad(
SDNode *
N,
unsigned NumVecs,
unsigned Opc,
1368 unsigned SubRegIdx) {
1370 EVT VT =
N->getValueType(0);
1378 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1380 for (
unsigned i = 0;
i < NumVecs; ++
i)
1382 CurDAG->getTargetExtractSubreg(SubRegIdx +
i, dl, VT, SuperReg));
1388 if (
auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(
N)) {
1390 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {
MemOp});
1393 CurDAG->RemoveDeadNode(
N);
1396 void AArch64DAGToDAGISel::SelectPostLoad(
SDNode *
N,
unsigned NumVecs,
1397 unsigned Opc,
unsigned SubRegIdx) {
1399 EVT VT =
N->getValueType(0);
1409 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1417 ReplaceUses(
SDValue(
N, 0), SuperReg);
1419 for (
unsigned i = 0;
i < NumVecs; ++
i)
1421 CurDAG->getTargetExtractSubreg(SubRegIdx +
i, dl, VT, SuperReg));
1425 CurDAG->RemoveDeadNode(
N);
1431 std::tuple<unsigned, SDValue, SDValue>
1432 AArch64DAGToDAGISel::findAddrModeSVELoadStore(
SDNode *
N,
unsigned Opc_rr,
1438 SDValue NewOffset = OldOffset;
1440 const bool IsRegImm = SelectAddrModeIndexedSVE<-8, 7>(
1441 N, OldBase, NewBase, NewOffset);
1445 const bool IsRegReg =
1446 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1449 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1452 void AArch64DAGToDAGISel::SelectPredicatedLoad(
SDNode *
N,
unsigned NumVecs,
1453 unsigned Scale,
unsigned Opc_ri,
1455 assert(Scale < 4 &&
"Invalid scaling value.");
1457 EVT VT =
N->getValueType(0);
1463 std::tie(Opc, Base,
Offset) = findAddrModeSVELoadStore(
1464 N, Opc_rr, Opc_ri,
N->getOperand(2),
1465 CurDAG->getTargetConstant(0,
DL,
MVT::i64), Scale);
1473 SDNode *
Load = CurDAG->getMachineNode(Opc,
DL, ResTys, Ops);
1475 for (
unsigned i = 0;
i < NumVecs; ++
i)
1476 ReplaceUses(
SDValue(
N,
i), CurDAG->getTargetExtractSubreg(
1477 AArch64::zsub0 +
i,
DL, VT, SuperReg));
1480 unsigned ChainIdx = NumVecs;
1482 CurDAG->RemoveDeadNode(
N);
1485 void AArch64DAGToDAGISel::SelectStore(
SDNode *
N,
unsigned NumVecs,
1488 EVT VT =
N->getOperand(2)->getValueType(0);
1495 SDValue Ops[] = {RegSeq,
N->getOperand(NumVecs + 2),
N->getOperand(0)};
1496 SDNode *St = CurDAG->getMachineNode(Opc, dl,
N->getValueType(0), Ops);
1500 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
1505 void AArch64DAGToDAGISel::SelectPredicatedStore(
SDNode *
N,
unsigned NumVecs,
1506 unsigned Scale,
unsigned Opc_rr,
1512 SDValue RegSeq = createZTuple(Regs);
1517 std::tie(Opc, Base,
Offset) = findAddrModeSVELoadStore(
1518 N, Opc_rr, Opc_ri,
N->getOperand(NumVecs + 3),
1519 CurDAG->getTargetConstant(0, dl,
MVT::i64), Scale);
1521 SDValue Ops[] = {RegSeq,
N->getOperand(NumVecs + 2),
1525 SDNode *St = CurDAG->getMachineNode(Opc, dl,
N->getValueType(0), Ops);
1530 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(
SDValue N,
SDValue &Base,
1537 if (
auto FINode = dyn_cast<FrameIndexSDNode>(
N)) {
1538 int FI = FINode->getIndex();
1540 OffImm = CurDAG->getTargetConstant(0, dl,
MVT::i64);
1547 void AArch64DAGToDAGISel::SelectPostStore(
SDNode *
N,
unsigned NumVecs,
1550 EVT VT =
N->getOperand(2)->getValueType(0);
1560 N->getOperand(NumVecs + 1),
1561 N->getOperand(NumVecs + 2),
1563 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1603 void AArch64DAGToDAGISel::SelectLoadLane(
SDNode *
N,
unsigned NumVecs,
1606 EVT VT =
N->getValueType(0);
1616 SDValue RegSeq = createQTuple(Regs);
1621 cast<ConstantSDNode>(
N->getOperand(NumVecs + 2))->getZExtValue();
1623 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl,
MVT::i64),
1624 N->getOperand(NumVecs + 3),
N->getOperand(0)};
1625 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1629 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1630 AArch64::qsub2, AArch64::qsub3 };
1631 for (
unsigned i = 0;
i < NumVecs; ++
i) {
1632 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[
i], dl, WideVT, SuperReg);
1639 CurDAG->RemoveDeadNode(
N);
1642 void AArch64DAGToDAGISel::SelectPostLoadLane(
SDNode *
N,
unsigned NumVecs,
1645 EVT VT =
N->getValueType(0);
1655 SDValue RegSeq = createQTuple(Regs);
1661 cast<ConstantSDNode>(
N->getOperand(NumVecs + 1))->getZExtValue();
1664 CurDAG->getTargetConstant(LaneNo, dl,
1666 N->getOperand(NumVecs + 2),
1667 N->getOperand(NumVecs + 3),
1669 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1681 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1682 AArch64::qsub2, AArch64::qsub3 };
1683 for (
unsigned i = 0;
i < NumVecs; ++
i) {
1684 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[
i], dl, WideVT,
1694 CurDAG->RemoveDeadNode(
N);
1697 void AArch64DAGToDAGISel::SelectStoreLane(
SDNode *
N,
unsigned NumVecs,
1700 EVT VT =
N->getOperand(2)->getValueType(0);
1710 SDValue RegSeq = createQTuple(Regs);
1713 cast<ConstantSDNode>(
N->getOperand(NumVecs + 2))->getZExtValue();
1715 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl,
MVT::i64),
1716 N->getOperand(NumVecs + 3),
N->getOperand(0)};
1721 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
1726 void AArch64DAGToDAGISel::SelectPostStoreLane(
SDNode *
N,
unsigned NumVecs,
1729 EVT VT =
N->getOperand(2)->getValueType(0);
1739 SDValue RegSeq = createQTuple(Regs);
1745 cast<ConstantSDNode>(
N->getOperand(NumVecs + 1))->getZExtValue();
1747 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl,
MVT::i64),
1748 N->getOperand(NumVecs + 2),
1749 N->getOperand(NumVecs + 3),
1751 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1755 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
1762 unsigned &LSB,
unsigned &MSB,
1763 unsigned NumberOfIgnoredLowBits,
1764 bool BiggerPattern) {
1766 "N must be a AND operation to call this function");
1768 EVT VT =
N->getValueType(0);
1774 "Type checking must have been done before calling this function");
1784 uint64_t AndImm = 0;
1788 const SDNode *Op0 =
N->getOperand(0).getNode();
1792 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1795 if (AndImm & (AndImm + 1))
1798 bool ClampMSB =
false;
1799 uint64_t SrlImm = 0;
1818 }
else if (BiggerPattern) {
1824 Opd0 =
N->getOperand(0);
1830 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.
getSizeInBits())) {
1833 <<
": Found large shift immediate, this should not happen\n"));
1838 MSB = SrlImm + (VT ==
MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1839 : countTrailingOnes<uint64_t>(AndImm)) -
1846 MSB = MSB > 31 ? 31 : MSB;
1848 Opc = VT ==
MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1853 SDValue &Opd0,
unsigned &Immr,
1857 EVT VT =
N->getValueType(0);
1860 "Type checking must have been done before calling this function");
1864 Op =
Op->getOperand(0);
1865 VT =
Op->getValueType(0);
1874 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
1878 Opc = (VT ==
MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1879 Opd0 =
Op.getOperand(0);
1881 Imms = ShiftImm +
Width - 1;
1905 uint64_t AndMask = 0;
1909 Opd0 =
N->getOperand(0).getOperand(0);
1911 uint64_t SrlImm = 0;
1917 if (BitWide &&
isMask_64(AndMask >> SrlImm)) {
1919 Opc = AArch64::UBFMWri;
1921 Opc = AArch64::UBFMXri;
1924 MSB = BitWide + SrlImm - 1;
1932 unsigned &Immr,
unsigned &Imms,
1933 bool BiggerPattern) {
1935 "N must be a SHR/SRA operation to call this function");
1937 EVT VT =
N->getValueType(0);
1943 "Type checking must have been done before calling this function");
1950 uint64_t ShlImm = 0;
1951 uint64_t TruncBits = 0;
1953 Opd0 =
N->getOperand(0).getOperand(0);
1960 Opd0 =
N->getOperand(0).getOperand(0);
1964 }
else if (BiggerPattern) {
1968 Opd0 =
N->getOperand(0);
1977 <<
": Found large shift immediate, this should not happen\n"));
1981 uint64_t SrlImm = 0;
1986 "bad amount in shift node!");
1987 int immr = SrlImm - ShlImm;
1992 Opc =
N->getOpcode() ==
ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1994 Opc =
N->getOpcode() ==
ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1998 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(
SDNode *
N) {
2001 EVT VT =
N->getValueType(0);
2002 EVT NarrowVT =
N->getOperand(0)->getValueType(0);
2014 unsigned Immr = ShiftImm;
2016 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2017 CurDAG->getTargetConstant(Imms, dl, VT)};
2018 CurDAG->SelectNodeTo(
N, AArch64::SBFMXri, VT, Ops);
2024 bool AArch64DAGToDAGISel::tryHighFPExt(
SDNode *
N) {
2028 SDValue Extract =
N->getOperand(0);
2029 EVT VT =
N->getValueType(0);
2046 auto Opcode = VT ==
MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
2047 CurDAG->SelectNodeTo(
N, Opcode, VT, Extract.
getOperand(0));
2052 SDValue &Opd0,
unsigned &Immr,
unsigned &Imms,
2053 unsigned NumberOfIgnoredLowBits = 0,
2054 bool BiggerPattern =
false) {
2058 switch (
N->getOpcode()) {
2060 if (!
N->isMachineOpcode())
2065 NumberOfIgnoredLowBits, BiggerPattern);
2074 unsigned NOpc =
N->getMachineOpcode();
2078 case AArch64::SBFMWri:
2079 case AArch64::UBFMWri:
2080 case AArch64::SBFMXri:
2081 case AArch64::UBFMXri:
2083 Opd0 =
N->getOperand(0);
2084 Immr = cast<ConstantSDNode>(
N->getOperand(1).getNode())->getZExtValue();
2085 Imms = cast<ConstantSDNode>(
N->getOperand(2).getNode())->getZExtValue();
2092 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(
SDNode *
N) {
2093 unsigned Opc, Immr, Imms;
2098 EVT VT =
N->getValueType(0);
2103 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT ==
MVT::i32) {
2104 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl,
MVT::i64),
2105 CurDAG->getTargetConstant(Imms, dl,
MVT::i64)};
2109 ReplaceNode(
N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2114 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2115 CurDAG->getTargetConstant(Imms, dl, VT)};
2116 CurDAG->SelectNodeTo(
N, Opc, VT, Ops);
2125 unsigned NumberOfIgnoredHighBits,
EVT VT) {
2127 "i32 or i64 mask type expected!");
2133 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2134 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
2157 cast<const ConstantSDNode>(
Op.getOperand(1).getNode())->getZExtValue();
2164 uint64_t Imm, uint64_t MSB,
2167 APInt OpUsefulBits(UsefulBits);
2171 OpUsefulBits <<= MSB - Imm + 1;
2176 OpUsefulBits <<= Imm;
2178 OpUsefulBits <<= MSB + 1;
2181 OpUsefulBits <<= OpUsefulBits.
getBitWidth() - Imm;
2187 UsefulBits &= OpUsefulBits;
2193 cast<const ConstantSDNode>(
Op.getOperand(1).getNode())->getZExtValue();
2195 cast<const ConstantSDNode>(
Op.getOperand(2).getNode())->getZExtValue();
2202 uint64_t ShiftTypeAndValue =
2203 cast<const ConstantSDNode>(
Op.getOperand(2).getNode())->getZExtValue();
2205 Mask.clearAllBits();
2213 Mask.lshrInPlace(ShiftAmt);
2219 Mask.lshrInPlace(ShiftAmt);
2231 cast<const ConstantSDNode>(
Op.getOperand(2).getNode())->getZExtValue();
2233 cast<const ConstantSDNode>(
Op.getOperand(3).getNode())->getZExtValue();
2235 APInt OpUsefulBits(UsefulBits);
2246 uint64_t
Width = MSB - Imm + 1;
2249 OpUsefulBits <<=
Width;
2252 if (
Op.getOperand(1) == Orig) {
2254 Mask = ResultUsefulBits & OpUsefulBits;
2258 if (
Op.getOperand(0) == Orig)
2260 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2263 uint64_t
Width = MSB + 1;
2266 OpUsefulBits <<=
Width;
2268 OpUsefulBits <<= LSB;
2270 if (
Op.getOperand(1) == Orig) {
2272 Mask = ResultUsefulBits & OpUsefulBits;
2273 Mask.lshrInPlace(LSB);
2276 if (
Op.getOperand(0) == Orig)
2277 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2294 case AArch64::ANDSWri:
2295 case AArch64::ANDSXri:
2296 case AArch64::ANDWri:
2297 case AArch64::ANDXri:
2301 case AArch64::UBFMWri:
2302 case AArch64::UBFMXri:
2305 case AArch64::ORRWrs:
2306 case AArch64::ORRXrs:
2311 case AArch64::BFMWri:
2312 case AArch64::BFMXri:
2315 case AArch64::STRBBui:
2316 case AArch64::STURBBi:
2322 case AArch64::STRHHui:
2323 case AArch64::STURHHi:
2336 unsigned Bitwidth =
Op.getScalarValueSizeInBits();
2338 UsefulBits =
APInt(Bitwidth, 0);
2343 for (
SDNode *Node :
Op.getNode()->uses()) {
2347 UsersUsefulBits |= UsefulBitsForUse;
2352 UsefulBits &= UsersUsefulBits;
2362 EVT VT =
Op.getValueType();
2365 unsigned UBFMOpc =
BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2368 if (ShlAmount > 0) {
2371 UBFMOpc, dl, VT,
Op,
2376 assert(ShlAmount < 0 &&
"expected right shift");
2377 int ShrAmount = -ShlAmount;
2390 SDValue &Src,
int &ShiftAmount,
2392 EVT VT =
Op.getValueType();
2401 uint64_t NonZeroBits = (~Known.
Zero).getZExtValue();
2408 Op =
Op.getOperand(0);
2413 if (!BiggerPattern && !
Op.hasOneUse())
2419 Op =
Op.getOperand(0);
2432 if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2451 EVT VT =
N->getValueType(0);
2470 if (!
And.hasOneUse() ||
2480 uint64_t NotKnownZero = (~Known.
Zero).getZExtValue();
2487 if ((OrImm & NotKnownZero) != 0) {
2499 unsigned ImmS =
Width - 1;
2505 bool IsBFI = LSB != 0;
2506 uint64_t BFIImm = OrImm >> LSB;
2510 unsigned OrChunks = 0, BFIChunks = 0;
2512 if (((OrImm >>
Shift) & 0xFFFF) != 0)
2514 if (((BFIImm >>
Shift) & 0xFFFF) != 0)
2517 if (BFIChunks > OrChunks)
2523 unsigned MOVIOpc = VT ==
MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2531 unsigned Opc = (VT ==
MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2540 EVT VT =
N->getValueType(0);
2570 for (
int I = 0;
I < 4; ++
I) {
2573 unsigned ImmR, ImmS;
2574 bool BiggerPattern =
I / 2;
2575 SDValue OrOpd0Val =
N->getOperand(
I % 2);
2577 SDValue OrOpd1Val =
N->getOperand((
I + 1) % 2);
2583 NumberOfIgnoredLowBits, BiggerPattern)) {
2586 if ((BFXOpc != AArch64::UBFMXri && VT ==
MVT::i64) ||
2587 (BFXOpc != AArch64::UBFMWri && VT ==
MVT::i32))
2592 Width = ImmS - ImmR + 1;
2603 Src, DstLSB,
Width)) {
2621 APInt BitsToBeInserted =
2624 if ((BitsToBeInserted & ~Known.
Zero) != 0)
2642 unsigned Opc = (VT ==
MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2650 uint64_t Mask0Imm, Mask1Imm;
2675 unsigned ShiftOpc = (VT ==
MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2682 unsigned ImmS =
Width - 1;
2688 unsigned Opc = (VT ==
MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2696 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(
SDNode *
N) {
2705 CurDAG->SelectNodeTo(
N, TargetOpcode::IMPLICIT_DEF,
N->getValueType(0));
2718 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(
SDNode *
N) {
2722 EVT VT =
N->getValueType(0);
2729 Op0, DstLSB,
Width))
2735 unsigned ImmS =
Width - 1;
2738 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR,
DL, VT),
2739 CurDAG->getTargetConstant(ImmS,
DL, VT)};
2740 unsigned Opc = (VT ==
MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2741 CurDAG->SelectNodeTo(
N, Opc, VT, Ops);
2747 bool AArch64DAGToDAGISel::tryShiftAmountMod(
SDNode *
N) {
2748 EVT VT =
N->getValueType(0);
2751 switch (
N->getOpcode()) {
2753 Opc = (VT ==
MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2756 Opc = (VT ==
MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2759 Opc = (VT ==
MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2762 Opc = (VT ==
MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2779 SDValue ShiftAmt =
N->getOperand(1);
2801 (Add0Imm %
Size == 0)) {
2806 NegOpc = AArch64::SUBWrr;
2807 ZeroReg = AArch64::WZR;
2810 NegOpc = AArch64::SUBXrr;
2811 ZeroReg = AArch64::XZR;
2814 CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
DL, ZeroReg, SubVT);
2816 CurDAG->getMachineNode(NegOpc,
DL, SubVT, Zero, Add1);
2817 NewShiftAmt =
SDValue(Neg, 0);
2841 AArch64::SUBREG_TO_REG,
DL, VT,
2846 SDValue Ops[] = {
N->getOperand(0), NewShiftAmt};
2847 CurDAG->SelectNodeTo(
N, Opc, VT, Ops);
2852 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(
SDValue N,
SDValue &FixedPos,
2853 unsigned RegWidth) {
2856 FVal = CN->getValueAPF();
2857 else if (
LoadSDNode *LN = dyn_cast<LoadSDNode>(
N)) {
2860 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2864 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2865 FVal = cast<ConstantFP>(CN->
getConstVal())->getValueAPF();
2884 if (!IsExact || !
IntVal.isPowerOf2())
return false;
2885 unsigned FBits =
IntVal.logBase2();
2889 if (FBits == 0 || FBits > RegWidth)
return false;
2900 RegString.
split(Fields,
':');
2902 if (Fields.size() == 1)
2905 assert(Fields.size() == 5
2906 &&
"Invalid number of fields in read register string");
2909 bool AllIntFields =
true;
2913 AllIntFields &= !
Field.getAsInteger(10, IntField);
2914 Ops.push_back(IntField);
2918 "Unexpected non-integer value in special register string.");
2922 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2923 (Ops[3] << 3) | (Ops[4]);
2930 bool AArch64DAGToDAGISel::tryReadRegister(
SDNode *
N) {
2931 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(
N->getOperand(1));
2937 ReplaceNode(
N, CurDAG->getMachineNode(
2947 if (TheReg && TheReg->Readable &&
2948 TheReg->haveFeatures(Subtarget->getFeatureBits()))
2949 Reg = TheReg->Encoding;
2954 ReplaceNode(
N, CurDAG->getMachineNode(
2962 ReplaceNode(
N, CurDAG->getMachineNode(
2976 bool AArch64DAGToDAGISel::tryWriteRegister(
SDNode *
N) {
2977 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(
N->getOperand(1));
2986 N->getOperand(2),
N->getOperand(0)));
2995 auto PMapper = AArch64PState::lookupPStateByName(RegString->
getString());
2997 assert (isa<ConstantSDNode>(
N->getOperand(2))
2998 &&
"Expected a constant integer expression.");
2999 unsigned Reg = PMapper->Encoding;
3000 uint64_t Immed = cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue();
3002 if (
Reg == AArch64PState::PAN ||
Reg == AArch64PState::UAO ||
Reg == AArch64PState::SSBS) {
3003 assert(Immed < 2 &&
"Bad imm");
3004 State = AArch64::MSRpstateImm1;
3006 assert(Immed < 16 &&
"Bad imm");
3007 State = AArch64::MSRpstateImm4;
3009 ReplaceNode(
N, CurDAG->getMachineNode(
3012 CurDAG->getTargetConstant(Immed,
DL,
MVT::i16),
3021 if (TheReg && TheReg->Writeable &&
3022 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3023 Reg = TheReg->Encoding;
3027 ReplaceNode(
N, CurDAG->getMachineNode(
3030 N->getOperand(2),
N->getOperand(0)));
3038 bool AArch64DAGToDAGISel::SelectCMP_SWAP(
SDNode *
N) {
3040 EVT MemTy = cast<MemSDNode>(
N)->getMemoryVT();
3043 if (Subtarget->hasLSE())
return false;
3046 Opcode = AArch64::CMP_SWAP_8;
3048 Opcode = AArch64::CMP_SWAP_16;
3050 Opcode = AArch64::CMP_SWAP_32;
3052 Opcode = AArch64::CMP_SWAP_64;
3057 SDValue Ops[] = {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3),
3059 SDNode *CmpSwap = CurDAG->getMachineNode(
3064 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {
MemOp});
3068 CurDAG->RemoveDeadNode(
N);
3073 bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(
SDValue N,
SDValue &Base,
3075 auto C = dyn_cast<ConstantSDNode>(
N);
3079 auto Ty =
N->getValueType(0);
3081 int64_t Imm =
C->getSExtValue();
3084 if ((Imm >= -128) && (Imm <= 127)) {
3085 Base = CurDAG->getTargetConstant(Imm,
DL, Ty);
3086 Offset = CurDAG->getTargetConstant(0,
DL, Ty);
3090 if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) {
3091 Base = CurDAG->getTargetConstant(Imm/256,
DL, Ty);
3092 Offset = CurDAG->getTargetConstant(8,
DL, Ty);
3100 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
3101 const int64_t ImmVal = CNode->getZExtValue();
3106 if ((ImmVal & 0xFF) == ImmVal) {
3108 Imm = CurDAG->getTargetConstant(ImmVal,
DL,
MVT::i32);
3115 if ((ImmVal & 0xFF) == ImmVal) {
3117 Imm = CurDAG->getTargetConstant(ImmVal,
DL,
MVT::i32);
3119 }
else if ((ImmVal & 0xFF00) == ImmVal) {
3121 Imm = CurDAG->getTargetConstant(ImmVal >> 8,
DL,
MVT::i32);
3133 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(
SDValue N,
SDValue &Imm) {
3134 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
3135 int64_t ImmVal = CNode->getSExtValue();
3137 if (ImmVal >= -128 && ImmVal < 128) {
3138 Imm = CurDAG->getTargetConstant(ImmVal,
DL,
MVT::i32);
3146 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
3147 uint64_t ImmVal = CNode->getZExtValue();
3157 ImmVal &= 0xFFFFFFFF;
3174 if (
auto CNode = dyn_cast<ConstantSDNode>(
N)) {
3175 uint64_t ImmVal = CNode->getZExtValue();
3182 ImmVal |= ImmVal << 8;
3183 ImmVal |= ImmVal << 16;
3184 ImmVal |= ImmVal << 32;
3188 ImmVal |= ImmVal << 16;
3189 ImmVal |= ImmVal << 32;
3192 ImmVal &= 0xFFFFFFFF;
3193 ImmVal |= ImmVal << 32;
3203 Imm = CurDAG->getTargetConstant(encoding,
DL,
MVT::i64);
3215 bool AArch64DAGToDAGISel::SelectSVEShiftImm(
SDValue N, uint64_t Low,
3216 uint64_t
High,
bool AllowSaturation,
3218 if (
auto *CN = dyn_cast<ConstantSDNode>(
N)) {
3219 uint64_t ImmVal = CN->getZExtValue();
3226 if (ImmVal >
High) {
3227 if (!AllowSaturation)
3239 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(
SDNode *
N) {
3243 if (!(isa<FrameIndexSDNode>(
N->getOperand(1)))) {
3249 cast<ConstantSDNode>(IRG_SP->
getOperand(1))->getZExtValue() !=
3250 Intrinsic::aarch64_irg_sp) {
3256 int FI = cast<FrameIndexSDNode>(
N->getOperand(1))->getIndex();
3257 SDValue FiOp = CurDAG->getTargetFrameIndex(
3259 int TagOffset = cast<ConstantSDNode>(
N->getOperand(3))->getZExtValue();
3261 SDNode *Out = CurDAG->getMachineNode(
3263 {FiOp, CurDAG->getTargetConstant(0,
DL,
MVT::i64),
N->getOperand(2),
3264 CurDAG->getTargetConstant(TagOffset,
DL,
MVT::i64)});
3265 ReplaceNode(
N, Out);
3269 void AArch64DAGToDAGISel::SelectTagP(
SDNode *
N) {
3270 assert(isa<ConstantSDNode>(
N->getOperand(3)) &&
3271 "llvm.aarch64.tagp third argument must be an immediate");
3272 if (trySelectStackSlotTagP(
N))
3279 int TagOffset = cast<ConstantSDNode>(
N->getOperand(3))->getZExtValue();
3281 {
N->getOperand(1),
N->getOperand(2)});
3283 {
SDValue(N1, 0),
N->getOperand(2)});
3284 SDNode *N3 = CurDAG->getMachineNode(
3287 CurDAG->getTargetConstant(TagOffset,
DL,
MVT::i64)});
3297 "Expected to extract from a packed scalable vector!");
3299 "Expected to extract a fixed length vector!");
3323 "Expected to insert into a packed scalable vector!");
3325 "Expected to insert a fixed length vector!");
3350 if (Node->isMachineOpcode()) {
3352 Node->setNodeId(-1);
3357 EVT VT = Node->getValueType(0);
3359 switch (Node->getOpcode()) {
3364 if (SelectCMP_SWAP(Node))
3369 if (tryReadRegister(Node))
3374 if (tryWriteRegister(Node))
3379 if (tryMLAV64LaneV128(Node))
3386 if (tryIndexedLoad(Node))
3395 if (tryBitfieldExtractOp(Node))
3397 if (tryBitfieldInsertInZeroOp(Node))
3402 if (tryShiftAmountMod(Node))
3407 if (tryBitfieldExtractOpFromSExt(Node))
3412 if (tryHighFPExt(Node))
3417 if (tryBitfieldInsertOp(Node))
3423 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
3427 EVT InVT = Node->getOperand(0).getValueType();
3439 ReplaceNode(Node,
extractSubReg(CurDAG, VT, Node->getOperand(0)));
3445 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
3447 if (!Node->getOperand(0).isUndef())
3451 EVT InVT = Node->getOperand(1).getValueType();
3463 ReplaceNode(Node,
insertSubReg(CurDAG, VT, Node->getOperand(1)));
3474 CurDAG->getEntryNode(),
SDLoc(Node), AArch64::WZR,
MVT::i32);
3475 ReplaceNode(Node,
New.getNode());
3479 CurDAG->getEntryNode(),
SDLoc(Node), AArch64::XZR,
MVT::i64);
3480 ReplaceNode(Node,
New.getNode());
3489 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
3492 SDValue TFI = CurDAG->getTargetFrameIndex(
3496 CurDAG->getTargetConstant(Shifter,
DL,
MVT::i32) };
3497 CurDAG->SelectNodeTo(Node, AArch64::ADDXri,
MVT::i64, Ops);
3501 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3505 case Intrinsic::aarch64_ldaxp:
3506 case Intrinsic::aarch64_ldxp: {
3508 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
3509 SDValue MemAddr = Node->getOperand(2);
3511 SDValue Chain = Node->getOperand(0);
3518 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3519 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {
MemOp});
3520 ReplaceNode(Node, Ld);
3523 case Intrinsic::aarch64_stlxp:
3524 case Intrinsic::aarch64_stxp: {
3526 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
3528 SDValue Chain = Node->getOperand(0);
3529 SDValue ValLo = Node->getOperand(2);
3530 SDValue ValHi = Node->getOperand(3);
3531 SDValue MemAddr = Node->getOperand(4);
3534 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
3539 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3540 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {
MemOp});
3542 ReplaceNode(Node, St);
3545 case Intrinsic::aarch64_neon_ld1x2:
3547 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
3550 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
3553 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
3556 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
3559 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
3562 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
3565 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3568 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
3572 case Intrinsic::aarch64_neon_ld1x3:
3574 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3577 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3580 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3583 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3586 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3589 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3592 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3595 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3599 case Intrinsic::aarch64_neon_ld1x4:
3601 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3604 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3607 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3610 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3613 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3616 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3619 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3622 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3626 case Intrinsic::aarch64_neon_ld2:
3628 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3631 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3634 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3637 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3640 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3643 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3646 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3649 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3653 case Intrinsic::aarch64_neon_ld3:
3655 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3658 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3661 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3664 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3667 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3670 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3673 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3676 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3680 case Intrinsic::aarch64_neon_ld4:
3682 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3685 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3688 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3691 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3694 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3697 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3700 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3703 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3707 case Intrinsic::aarch64_neon_ld2r:
3709 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3712 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3715 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3718 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3721 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3724 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3727 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3730 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3734 case Intrinsic::aarch64_neon_ld3r:
3736 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3739 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3742 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3745 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3748 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3751 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3754 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3757 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3761 case Intrinsic::aarch64_neon_ld4r:
3763 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3766 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3769 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3772 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3775 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3778 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3781 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3784 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3788 case Intrinsic::aarch64_neon_ld2lane:
3790 SelectLoadLane(Node, 2, AArch64::LD2i8);
3794 SelectLoadLane(Node, 2, AArch64::LD2i16);
3798 SelectLoadLane(Node, 2, AArch64::LD2i32);
3802 SelectLoadLane(Node, 2, AArch64::LD2i64);
3806 case Intrinsic::aarch64_neon_ld3lane:
3808 SelectLoadLane(Node, 3, AArch64::LD3i8);
3812 SelectLoadLane(Node, 3, AArch64::LD3i16);
3816 SelectLoadLane(Node, 3, AArch64::LD3i32);
3820 SelectLoadLane(Node, 3, AArch64::LD3i64);
3824 case Intrinsic::aarch64_neon_ld4lane:
3826 SelectLoadLane(Node, 4, AArch64::LD4i8);
3830 SelectLoadLane(Node, 4, AArch64::LD4i16);
3834 SelectLoadLane(Node, 4, AArch64::LD4i32);
3838 SelectLoadLane(Node, 4, AArch64::LD4i64);
3842 case Intrinsic::aarch64_ld64b:
3843 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
3848 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3852 case Intrinsic::aarch64_tagp:
3855 case Intrinsic::aarch64_neon_tbl2:
3856 SelectTable(Node, 2,
3857 VT ==
MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3860 case Intrinsic::aarch64_neon_tbl3:
3861 SelectTable(Node, 3, VT ==
MVT::v8i8 ? AArch64::TBLv8i8Three
3862 : AArch64::TBLv16i8Three,
3865 case Intrinsic::aarch64_neon_tbl4:
3866 SelectTable(Node, 4, VT ==
MVT::v8i8 ? AArch64::TBLv8i8Four
3867 : AArch64::TBLv16i8Four,
3870 case Intrinsic::aarch64_neon_tbx2:
3871 SelectTable(Node, 2,
3872 VT ==
MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3875 case Intrinsic::aarch64_neon_tbx3:
3876 SelectTable(Node, 3, VT ==
MVT::v8i8 ? AArch64::TBXv8i8Three
3877 : AArch64::TBXv16i8Three,
3880 case Intrinsic::aarch64_neon_tbx4:
3881 SelectTable(Node, 4, VT ==
MVT::v8i8 ? AArch64::TBXv8i8Four
3882 : AArch64::TBXv16i8Four,
3885 case Intrinsic::aarch64_neon_smull:
3886 case Intrinsic::aarch64_neon_umull:
3887 if (tryMULLV64LaneV128(IntNo, Node))
3894 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3895 if (Node->getNumOperands() >= 3)
3896 VT = Node->getOperand(2)->getValueType(0);
3900 case Intrinsic::aarch64_neon_st1x2: {
3902 SelectStore(Node, 2, AArch64::ST1Twov8b);
3905 SelectStore(Node, 2, AArch64::ST1Twov16b);
3909 SelectStore(Node, 2, AArch64::ST1Twov4h);
3913 SelectStore(Node, 2, AArch64::ST1Twov8h);
3916 SelectStore(Node, 2, AArch64::ST1Twov2s);
3919 SelectStore(Node, 2, AArch64::ST1Twov4s);
3922 SelectStore(Node, 2, AArch64::ST1Twov2d);
3925 SelectStore(Node, 2, AArch64::ST1Twov1d);
3930 case Intrinsic::aarch64_neon_st1x3: {
3932 SelectStore(Node, 3, AArch64::ST1Threev8b);
3935 SelectStore(Node, 3, AArch64::ST1Threev16b);
3939 SelectStore(Node, 3, AArch64::ST1Threev4h);
3943 SelectStore(Node, 3, AArch64::ST1Threev8h);
3946 SelectStore(Node, 3, AArch64::ST1Threev2s);
3949 SelectStore(Node, 3, AArch64::ST1Threev4s);
3952 SelectStore(Node, 3, AArch64::ST1Threev2d);
3955 SelectStore(Node, 3, AArch64::ST1Threev1d);
3960 case Intrinsic::aarch64_neon_st1x4: {
3962 SelectStore(Node, 4, AArch64::ST1Fourv8b);
3965 SelectStore(Node, 4, AArch64::ST1Fourv16b);
3969 SelectStore(Node, 4, AArch64::ST1Fourv4h);
3973 SelectStore(Node, 4, AArch64::ST1Fourv8h);
3976 SelectStore(Node, 4, AArch64::ST1Fourv2s);
3979 SelectStore(Node, 4, AArch64::ST1Fourv4s);
3982 SelectStore(Node, 4, AArch64::ST1Fourv2d);
3985 SelectStore(Node, 4, AArch64::ST1Fourv1d);
3990 case Intrinsic::aarch64_neon_st2: {
3992 SelectStore(Node, 2, AArch64::ST2Twov8b);
3995 SelectStore(Node, 2, AArch64::ST2Twov16b);
3999 SelectStore(Node, 2, AArch64::ST2Twov4h);
4003 SelectStore(Node, 2, AArch64::ST2Twov8h);
4006 SelectStore(Node, 2, AArch64::ST2Twov2s);
4009 SelectStore(Node, 2, AArch64::ST2Twov4s);
4012 SelectStore(Node, 2, AArch64::ST2Twov2d);
4015 SelectStore(Node, 2, AArch64::ST1Twov1d);
4020 case Intrinsic::aarch64_neon_st3: {
4022 SelectStore(Node, 3, AArch64::ST3Threev8b);
4025 SelectStore(Node, 3, AArch64::ST3Threev16b);
4029 SelectStore(Node, 3, AArch64::ST3Threev4h);
4033 SelectStore(Node, 3, AArch64::ST3Threev8h);
4036 SelectStore(Node, 3, AArch64::ST3Threev2s);
4039 SelectStore(Node, 3, AArch64::ST3Threev4s);
4042 SelectStore(Node, 3, AArch64::ST3Threev2d);
4045 SelectStore(Node, 3, AArch64::ST1Threev1d);
4050 case Intrinsic::aarch64_neon_st4: {
4052 SelectStore(Node, 4, AArch64::ST4Fourv8b);
4055 SelectStore(Node, 4, AArch64::ST4Fourv16b);
4059 SelectStore(Node, 4, AArch64::ST4Fourv4h);
4063 SelectStore(Node, 4, AArch64::ST4Fourv8h);
4066 SelectStore(Node, 4, AArch64::ST4Fourv2s);
4069 SelectStore(Node, 4, AArch64::ST4Fourv4s);
4072 SelectStore(Node, 4, AArch64::ST4Fourv2d);
4075 SelectStore(Node, 4, AArch64::ST1Fourv1d);
4080 case Intrinsic::aarch64_neon_st2lane: {
4082 SelectStoreLane(Node, 2, AArch64::ST2i8);
4086 SelectStoreLane(Node, 2, AArch64::ST2i16);
4090 SelectStoreLane(Node, 2, AArch64::ST2i32);
4094 SelectStoreLane(Node, 2, AArch64::ST2i64);
4099 case Intrinsic::aarch64_neon_st3lane: {
4101 SelectStoreLane(Node, 3, AArch64::ST3i8);
4105 SelectStoreLane(Node, 3, AArch64::ST3i16);
4109 SelectStoreLane(Node, 3, AArch64::ST3i32);
4113 SelectStoreLane(Node, 3, AArch64::ST3i64);
4118 case Intrinsic::aarch64_neon_st4lane: {
4120 SelectStoreLane(Node, 4, AArch64::ST4i8);
4124 SelectStoreLane(Node, 4, AArch64::ST4i16);
4128 SelectStoreLane(Node, 4, AArch64::ST4i32);
4132 SelectStoreLane(Node, 4, AArch64::ST4i64);
4137 case Intrinsic::aarch64_sve_st2: {
4139 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
4143 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
4146 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
4149 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
4154 case Intrinsic::aarch64_sve_st3: {
4156 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
4160 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
4163 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
4166 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
4171 case Intrinsic::aarch64_sve_st4: {
4173 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
4177 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
4180 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
4183 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
4193 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
4196 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
4199 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
4202 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
4205 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
4208 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
4211 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4214 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
4221 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
4224 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
4227 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
4230 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
4233 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
4236 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
4239 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4242 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
4249 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
4252 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
4255 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
4258 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
4261 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
4264 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
4267 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4270 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
4277 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
4280 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
4283 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
4286 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
4289 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
4292 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
4295 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4298 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
4305 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
4308 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
4311 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
4314 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
4317 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
4320 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
4323 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4326 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
4333 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
4336 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
4339 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
4342 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
4345 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
4348 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
4351 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4354 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
4361 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
4364 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
4367 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
4370 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
4373 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
4376 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
4379 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
4382 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
4389 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
4392 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
4395 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
4398 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
4401 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
4404 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
4407 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
4410 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
4417 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
4420 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
4423 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
4426 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
4429 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
4432 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
4435 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
4438 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
4445 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
4448 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
4451 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
4454 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
4457 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
4460 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
4463 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
4466 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
4473 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
4477 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
4481 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
4485 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
4492 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
4496 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
4500 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
4504 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
4511 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
4515 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
4519 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
4523 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
4530 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
4534 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
4538 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
4542 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
4548 VT = Node->getOperand(1).getValueType();
4550 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
4553 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
4556 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
4559 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
4562 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
4565 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
4568 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
4571 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4577 VT = Node->getOperand(1).getValueType();
4579 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
4582 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
4585 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
4588 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
4591 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
4594 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
4597 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
4600 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4606 VT = Node->getOperand(1).getValueType();
4608 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
4611 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
4614 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
4617 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
4620 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
4623 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
4626 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
4629 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4635 VT = Node->getOperand(1).getValueType();
4637 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
4640 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4643 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4646 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4649 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4652 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4655 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4658 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4664 VT = Node->getOperand(1).getValueType();
4666 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4669 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4672 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4675 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4678 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4681 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4684 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4687 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4693 VT = Node->getOperand(1).getValueType();
4695 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4698 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4701 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4704 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4707 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4710 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4713 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4716 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4722 VT = Node->getOperand(1).getValueType();
4724 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4728 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4732 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4736 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4742 VT = Node->getOperand(1).getValueType();
4744 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4748 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4752 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4756 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4762 VT = Node->getOperand(1).getValueType();
4764 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4768 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4772 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4776 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4783 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
4787 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
4790 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
4793 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
4800 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
4804 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
4807 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
4810 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
4817 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
4821 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
4824 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
4827 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
4842 return new AArch64DAGToDAGISel(
TM, OptLevel);
4854 assert(NumVec > 0 && NumVec < 5 &&
"Invalid number of vectors.");
4873 if (isa<MemSDNode>(Root))
4874 return cast<MemSDNode>(Root)->getMemoryVT();
4876 if (isa<MemIntrinsicSDNode>(Root))
4877 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
4879 const unsigned Opcode = Root->
getOpcode();
4887 return cast<VTSDNode>(Root->
getOperand(3))->getVT();
4889 return cast<VTSDNode>(Root->
getOperand(4))->getVT();
4906 const unsigned IntNo =
4907 cast<ConstantSDNode>(Root->
getOperand(1))->getZExtValue();
4908 if (IntNo != Intrinsic::aarch64_sve_prf)