57#define GET_INSTRINFO_CTOR_DTOR
58#include "AArch64GenInstrInfo.inc"
62 cl::desc(
"Restrict range of TB[N]Z instructions (DEBUG)"));
66 cl::desc(
"Restrict range of CB[N]Z instructions (DEBUG)"));
70 cl::desc(
"Restrict range of Bcc instructions (DEBUG)"));
75 RI(STI.getTargetTriple()), Subtarget(STI) {}
85 auto Op =
MI.getOpcode();
86 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
87 return getInlineAsmLength(
MI.getOperand(0).getSymbolName(), *MAI);
91 if (
MI.isMetaInstruction())
96 unsigned NumBytes = 0;
112 case TargetOpcode::STACKMAP:
115 assert(NumBytes % 4 == 0 &&
"Invalid number of NOP bytes requested!");
117 case TargetOpcode::PATCHPOINT:
120 assert(NumBytes % 4 == 0 &&
"Invalid number of NOP bytes requested!");
122 case TargetOpcode::STATEPOINT:
124 assert(NumBytes % 4 == 0 &&
"Invalid number of NOP bytes requested!");
130 NumBytes =
MI.getOperand(1).getImm();
132 case TargetOpcode::BUNDLE:
133 NumBytes = getInstBundleLength(
MI);
140unsigned AArch64InstrInfo::getInstBundleLength(
const MachineInstr &
MI)
const {
144 while (++
I !=
E &&
I->isInsideBundle()) {
145 assert(!
I->isBundle() &&
"No nested bundle!");
204 int64_t BrOffset)
const {
206 assert(Bits >= 3 &&
"max branch displacement must be enough to jump"
207 "over conditional branch expansion");
208 return isIntN(Bits, BrOffset / 4);
213 switch (
MI.getOpcode()) {
217 return MI.getOperand(0).getMBB();
222 return MI.getOperand(2).getMBB();
228 return MI.getOperand(1).getMBB();
237 bool AllowModify)
const {
244 if (
I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
245 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
249 if (!isUnpredicatedTerminator(*
I))
256 unsigned LastOpc = LastInst->
getOpcode();
257 if (
I ==
MBB.
begin() || !isUnpredicatedTerminator(*--
I)) {
272 unsigned SecondLastOpc = SecondLastInst->
getOpcode();
279 LastInst = SecondLastInst;
281 if (
I ==
MBB.
begin() || !isUnpredicatedTerminator(*--
I)) {
286 SecondLastInst = &*
I;
287 SecondLastOpc = SecondLastInst->
getOpcode();
299 LastInst = SecondLastInst;
301 if (
I ==
MBB.
begin() || !isUnpredicatedTerminator(*--
I)) {
303 "unreachable unconditional branches removed above");
312 SecondLastInst = &*
I;
313 SecondLastOpc = SecondLastInst->
getOpcode();
318 if (SecondLastInst &&
I !=
MBB.
begin() && isUnpredicatedTerminator(*--
I))
334 I->eraseFromParent();
343 I->eraseFromParent();
352 MachineBranchPredicate &MBP,
353 bool AllowModify)
const {
363 if (
I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
364 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
368 if (!isUnpredicatedTerminator(*
I))
373 unsigned LastOpc = LastInst->
getOpcode();
388 assert(MBP.TrueDest &&
"expected!");
391 MBP.ConditionDef =
nullptr;
392 MBP.SingleUseCondition =
false;
396 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
397 : MachineBranchPredicate::PRED_EQ;
403 if (
Cond[0].getImm() != -1) {
409 switch (
Cond[1].getImm()) {
413 Cond[1].setImm(AArch64::CBNZW);
416 Cond[1].setImm(AArch64::CBZW);
419 Cond[1].setImm(AArch64::CBNZX);
422 Cond[1].setImm(AArch64::CBZX);
425 Cond[1].setImm(AArch64::TBNZW);
428 Cond[1].setImm(AArch64::TBZW);
431 Cond[1].setImm(AArch64::TBNZX);
434 Cond[1].setImm(AArch64::TBZX);
443 int *BytesRemoved)
const {
453 I->eraseFromParent();
470 I->eraseFromParent();
477void AArch64InstrInfo::instantiateCondBranch(
480 if (
Cond[0].getImm() != -1) {
498 assert(
TBB &&
"insertBranch must not be told to insert a fallthrough");
537 unsigned *NewVReg =
nullptr) {
542 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(
MRI.getRegClass(VReg));
545 unsigned SrcOpNum = 0;
547 case AArch64::ADDSXri:
548 case AArch64::ADDSWri:
554 case AArch64::ADDXri:
555 case AArch64::ADDWri:
561 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
564 case AArch64::ORNXrr:
565 case AArch64::ORNWrr: {
568 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
571 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
575 case AArch64::SUBSXrr:
576 case AArch64::SUBSWrr:
582 case AArch64::SUBXrr:
583 case AArch64::SUBWrr: {
586 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
589 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
595 assert(Opc && SrcOpNum &&
"Missing parameters");
607 int &FalseCycles)
const {
611 RI.getCommonSubClass(
MRI.getRegClass(TrueReg),
MRI.getRegClass(FalseReg));
618 if (!RI.getCommonSubClass(RC,
MRI.getRegClass(DstReg)))
622 unsigned ExtraCondLat =
Cond.size() != 1;
626 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
627 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
629 CondCycles = 1 + ExtraCondLat;
630 TrueCycles = FalseCycles = 1;
640 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
641 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
642 CondCycles = 5 + ExtraCondLat;
643 TrueCycles = FalseCycles = 2;
660 switch (
Cond.size()) {
669 switch (
Cond[1].getImm()) {
692 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
698 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
708 switch (
Cond[1].getImm()) {
721 if (
Cond[1].getImm() == AArch64::TBZW ||
Cond[1].getImm() == AArch64::TBNZW)
737 bool TryFold =
false;
738 if (
MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
739 RC = &AArch64::GPR64RegClass;
740 Opc = AArch64::CSELXr;
742 }
else if (
MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
743 RC = &AArch64::GPR32RegClass;
744 Opc = AArch64::CSELWr;
746 }
else if (
MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
747 RC = &AArch64::FPR64RegClass;
748 Opc = AArch64::FCSELDrrr;
749 }
else if (
MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
750 RC = &AArch64::FPR32RegClass;
751 Opc = AArch64::FCSELSrrr;
753 assert(RC &&
"Unsupported regclass");
757 unsigned NewVReg = 0;
772 MRI.clearKillFlags(NewVReg);
777 MRI.constrainRegClass(TrueReg, RC);
778 MRI.constrainRegClass(FalseReg, RC);
790 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
798 if (!Subtarget.hasCustomCheapAsMoveHandling())
799 return MI.isAsCheapAsAMove();
801 const unsigned Opcode =
MI.getOpcode();
805 if (Subtarget.hasZeroCycleZeroingFP()) {
806 if (Opcode == AArch64::FMOVH0 ||
807 Opcode == AArch64::FMOVS0 ||
808 Opcode == AArch64::FMOVD0)
812 if (Subtarget.hasZeroCycleZeroingGP()) {
813 if (Opcode == TargetOpcode::COPY &&
814 (
MI.getOperand(1).getReg() == AArch64::WZR ||
815 MI.getOperand(1).getReg() == AArch64::XZR))
821 if (Subtarget.hasExynosCheapAsMoveHandling()) {
822 if (isExynosCheapAsMove(
MI))
825 return MI.isAsCheapAsAMove();
835 case AArch64::ADDWri:
836 case AArch64::ADDXri:
837 case AArch64::SUBWri:
838 case AArch64::SUBXri:
839 return (
MI.getOperand(3).getImm() == 0);
842 case AArch64::ANDWri:
843 case AArch64::ANDXri:
844 case AArch64::EORWri:
845 case AArch64::EORXri:
846 case AArch64::ORRWri:
847 case AArch64::ORRXri:
851 case AArch64::ANDWrr:
852 case AArch64::ANDXrr:
853 case AArch64::BICWrr:
854 case AArch64::BICXrr:
855 case AArch64::EONWrr:
856 case AArch64::EONXrr:
857 case AArch64::EORWrr:
858 case AArch64::EORXrr:
859 case AArch64::ORNWrr:
860 case AArch64::ORNXrr:
861 case AArch64::ORRWrr:
862 case AArch64::ORRXrr:
867 case AArch64::MOVi32imm:
869 case AArch64::MOVi64imm:
877 switch (
MI.getOpcode()) {
881 case AArch64::ADDWrs:
882 case AArch64::ADDXrs:
883 case AArch64::ADDSWrs:
884 case AArch64::ADDSXrs: {
885 unsigned Imm =
MI.getOperand(3).getImm();
892 case AArch64::ADDWrx:
893 case AArch64::ADDXrx:
894 case AArch64::ADDXrx64:
895 case AArch64::ADDSWrx:
896 case AArch64::ADDSXrx:
897 case AArch64::ADDSXrx64: {
898 unsigned Imm =
MI.getOperand(3).getImm();
910 case AArch64::SUBWrs:
911 case AArch64::SUBSWrs: {
912 unsigned Imm =
MI.getOperand(3).getImm();
914 return ShiftVal == 0 ||
918 case AArch64::SUBXrs:
919 case AArch64::SUBSXrs: {
920 unsigned Imm =
MI.getOperand(3).getImm();
922 return ShiftVal == 0 ||
926 case AArch64::SUBWrx:
927 case AArch64::SUBXrx:
928 case AArch64::SUBXrx64:
929 case AArch64::SUBSWrx:
930 case AArch64::SUBSXrx:
931 case AArch64::SUBSXrx64: {
932 unsigned Imm =
MI.getOperand(3).getImm();
944 case AArch64::LDRBBroW:
945 case AArch64::LDRBBroX:
946 case AArch64::LDRBroW:
947 case AArch64::LDRBroX:
948 case AArch64::LDRDroW:
949 case AArch64::LDRDroX:
950 case AArch64::LDRHHroW:
951 case AArch64::LDRHHroX:
952 case AArch64::LDRHroW:
953 case AArch64::LDRHroX:
954 case AArch64::LDRQroW:
955 case AArch64::LDRQroX:
956 case AArch64::LDRSBWroW:
957 case AArch64::LDRSBWroX:
958 case AArch64::LDRSBXroW:
959 case AArch64::LDRSBXroX:
960 case AArch64::LDRSHWroW:
961 case AArch64::LDRSHWroX:
962 case AArch64::LDRSHXroW:
963 case AArch64::LDRSHXroX:
964 case AArch64::LDRSWroW:
965 case AArch64::LDRSWroX:
966 case AArch64::LDRSroW:
967 case AArch64::LDRSroX:
968 case AArch64::LDRWroW:
969 case AArch64::LDRWroX:
970 case AArch64::LDRXroW:
971 case AArch64::LDRXroX:
972 case AArch64::PRFMroW:
973 case AArch64::PRFMroX:
974 case AArch64::STRBBroW:
975 case AArch64::STRBBroX:
976 case AArch64::STRBroW:
977 case AArch64::STRBroX:
978 case AArch64::STRDroW:
979 case AArch64::STRDroX:
980 case AArch64::STRHHroW:
981 case AArch64::STRHHroX:
982 case AArch64::STRHroW:
983 case AArch64::STRHroX:
984 case AArch64::STRQroW:
985 case AArch64::STRQroX:
986 case AArch64::STRSroW:
987 case AArch64::STRSroX:
988 case AArch64::STRWroW:
989 case AArch64::STRWroX:
990 case AArch64::STRXroW:
991 case AArch64::STRXroX: {
992 unsigned IsSigned =
MI.getOperand(3).getImm();
999 unsigned Opc =
MI.getOpcode();
1003 case AArch64::SEH_StackAlloc:
1004 case AArch64::SEH_SaveFPLR:
1005 case AArch64::SEH_SaveFPLR_X:
1006 case AArch64::SEH_SaveReg:
1007 case AArch64::SEH_SaveReg_X:
1008 case AArch64::SEH_SaveRegP:
1009 case AArch64::SEH_SaveRegP_X:
1010 case AArch64::SEH_SaveFReg:
1011 case AArch64::SEH_SaveFReg_X:
1012 case AArch64::SEH_SaveFRegP:
1013 case AArch64::SEH_SaveFRegP_X:
1014 case AArch64::SEH_SetFP:
1015 case AArch64::SEH_AddFP:
1016 case AArch64::SEH_Nop:
1017 case AArch64::SEH_PrologEnd:
1018 case AArch64::SEH_EpilogStart:
1019 case AArch64::SEH_EpilogEnd:
1020 case AArch64::SEH_PACSignLR:
1027 unsigned &SubIdx)
const {
1028 switch (
MI.getOpcode()) {
1031 case AArch64::SBFMXri:
1032 case AArch64::UBFMXri:
1035 if (
MI.getOperand(2).getImm() != 0 ||
MI.getOperand(3).getImm() != 31)
1038 SrcReg =
MI.getOperand(1).getReg();
1039 DstReg =
MI.getOperand(0).getReg();
1040 SubIdx = AArch64::sub_32;
1049 int64_t OffsetA = 0, OffsetB = 0;
1050 unsigned WidthA = 0, WidthB = 0;
1051 bool OffsetAIsScalable =
false, OffsetBIsScalable =
false;
1072 OffsetAIsScalable == OffsetBIsScalable) {
1073 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1074 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1075 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1076 if (LowOffset + LowWidth <= HighOffset)
1088 switch (
MI.getOpcode()) {
1091 if (
MI.getOperand(0).getImm() == 0x14)
1098 case AArch64::MSRpstatesvcrImm1:
1105 auto Next = std::next(
MI.getIterator());
1106 return Next !=
MBB->
end() && Next->isCFIInstruction();
1113 Register &SrcReg2, int64_t &CmpMask,
1114 int64_t &CmpValue)
const {
1117 assert(
MI.getNumOperands() >= 2 &&
"All AArch64 cmps should have 2 operands");
1118 if (!
MI.getOperand(1).isReg())
1121 switch (
MI.getOpcode()) {
1124 case AArch64::PTEST_PP:
1125 case AArch64::PTEST_PP_ANY:
1126 SrcReg =
MI.getOperand(0).getReg();
1127 SrcReg2 =
MI.getOperand(1).getReg();
1132 case AArch64::SUBSWrr:
1133 case AArch64::SUBSWrs:
1134 case AArch64::SUBSWrx:
1135 case AArch64::SUBSXrr:
1136 case AArch64::SUBSXrs:
1137 case AArch64::SUBSXrx:
1138 case AArch64::ADDSWrr:
1139 case AArch64::ADDSWrs:
1140 case AArch64::ADDSWrx:
1141 case AArch64::ADDSXrr:
1142 case AArch64::ADDSXrs:
1143 case AArch64::ADDSXrx:
1145 SrcReg =
MI.getOperand(1).getReg();
1146 SrcReg2 =
MI.getOperand(2).getReg();
1150 case AArch64::SUBSWri:
1151 case AArch64::ADDSWri:
1152 case AArch64::SUBSXri:
1153 case AArch64::ADDSXri:
1154 SrcReg =
MI.getOperand(1).getReg();
1157 CmpValue =
MI.getOperand(2).getImm();
1159 case AArch64::ANDSWri:
1160 case AArch64::ANDSXri:
1163 SrcReg =
MI.getOperand(1).getReg();
1167 MI.getOperand(2).getImm(),
1168 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1177 assert(
MBB &&
"Can't get MachineBasicBlock here");
1179 assert(MF &&
"Can't get MachineFunction here");
1184 for (
unsigned OpIdx = 0, EndIdx = Instr.
getNumOperands(); OpIdx < EndIdx;
1191 if (!OpRegCstraints)
1199 "Operand has register constraints without being a register!");
1202 if (Reg.isPhysical()) {
1203 if (!OpRegCstraints->
contains(Reg))
1206 !
MRI->constrainRegClass(Reg, OpRegCstraints))
1219 bool MIDefinesZeroReg =
false;
1220 if (
MI.definesRegister(AArch64::WZR) ||
MI.definesRegister(AArch64::XZR))
1221 MIDefinesZeroReg =
true;
1223 switch (
MI.getOpcode()) {
1225 return MI.getOpcode();
1226 case AArch64::ADDSWrr:
1227 return AArch64::ADDWrr;
1228 case AArch64::ADDSWri:
1229 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1230 case AArch64::ADDSWrs:
1231 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1232 case AArch64::ADDSWrx:
1233 return AArch64::ADDWrx;
1234 case AArch64::ADDSXrr:
1235 return AArch64::ADDXrr;
1236 case AArch64::ADDSXri:
1237 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1238 case AArch64::ADDSXrs:
1239 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1240 case AArch64::ADDSXrx:
1241 return AArch64::ADDXrx;
1242 case AArch64::SUBSWrr:
1243 return AArch64::SUBWrr;
1244 case AArch64::SUBSWri:
1245 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1246 case AArch64::SUBSWrs:
1247 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1248 case AArch64::SUBSWrx:
1249 return AArch64::SUBWrx;
1250 case AArch64::SUBSXrr:
1251 return AArch64::SUBXrr;
1252 case AArch64::SUBSXri:
1253 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1254 case AArch64::SUBSXrs:
1255 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1256 case AArch64::SUBSXrx:
1257 return AArch64::SUBXrx;
1272 if (To == To->getParent()->begin())
1277 if (To->getParent() !=
From->getParent())
1289 Instr.modifiesRegister(AArch64::NZCV,
TRI)) ||
1290 ((AccessToCheck &
AK_Read) && Instr.readsRegister(AArch64::NZCV,
TRI)))
1298bool AArch64InstrInfo::optimizePTestInstr(
1299 MachineInstr *PTest,
unsigned MaskReg,
unsigned PredReg,
1301 auto *
Mask =
MRI->getUniqueVRegDef(MaskReg);
1302 auto *Pred =
MRI->getUniqueVRegDef(PredReg);
1303 auto NewOp = Pred->getOpcode();
1304 bool OpChanged =
false;
1306 unsigned MaskOpcode =
Mask->getOpcode();
1307 unsigned PredOpcode = Pred->getOpcode();
1311 if (
isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike) &&
1314 Mask->getOperand(1).getImm() == 31) {
1323 if (PredIsPTestLike) {
1324 auto PTestLikeMask =
MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1325 if (Mask != PTestLikeMask && PTest->
getOpcode() != AArch64::PTEST_PP_ANY)
1330 }
else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
1331 PTest->
getOpcode() == AArch64::PTEST_PP_ANY) {
1337 }
else if (PredIsPTestLike) {
1360 auto PTestLikeMask =
MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1362 if ((Mask != PTestLikeMask) ||
1364 PTest->
getOpcode() != AArch64::PTEST_PP_ANY))
1371 switch (PredOpcode) {
1372 case AArch64::AND_PPzPP:
1373 case AArch64::BIC_PPzPP:
1374 case AArch64::EOR_PPzPP:
1375 case AArch64::NAND_PPzPP:
1376 case AArch64::NOR_PPzPP:
1377 case AArch64::ORN_PPzPP:
1378 case AArch64::ORR_PPzPP:
1379 case AArch64::BRKA_PPzP:
1380 case AArch64::BRKPA_PPzPP:
1381 case AArch64::BRKB_PPzP:
1382 case AArch64::BRKPB_PPzPP:
1383 case AArch64::RDFFR_PPz: {
1386 auto *PredMask =
MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1387 if (Mask != PredMask)
1391 case AArch64::BRKN_PPzP: {
1395 if ((MaskOpcode != AArch64::PTRUE_B) ||
1396 (
Mask->getOperand(1).getImm() != 31))
1400 case AArch64::PTRUE_B:
1423 Pred->setDesc(
get(NewOp));
1428 assert(succeeded &&
"Operands have incompatible register classes!");
1429 Pred->addRegisterDefined(AArch64::NZCV,
TRI);
1433 if (Pred->registerDefIsDead(AArch64::NZCV,
TRI)) {
1434 unsigned i = 0,
e = Pred->getNumOperands();
1435 for (; i !=
e; ++i) {
1465 if (DeadNZCVIdx != -1) {
1480 assert(succeeded &&
"Some operands reg class are incompatible!");
1484 if (CmpInstr.
getOpcode() == AArch64::PTEST_PP ||
1485 CmpInstr.
getOpcode() == AArch64::PTEST_PP_ANY)
1486 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2,
MRI);
1495 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *
MRI))
1497 return (CmpValue == 0 || CmpValue == 1) &&
1498 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *
MRI);
1508 return AArch64::INSTRUCTION_LIST_END;
1510 case AArch64::ADDSWrr:
1511 case AArch64::ADDSWri:
1512 case AArch64::ADDSXrr:
1513 case AArch64::ADDSXri:
1514 case AArch64::SUBSWrr:
1515 case AArch64::SUBSWri:
1516 case AArch64::SUBSXrr:
1517 case AArch64::SUBSXri:
1520 case AArch64::ADDWrr:
1521 return AArch64::ADDSWrr;
1522 case AArch64::ADDWri:
1523 return AArch64::ADDSWri;
1524 case AArch64::ADDXrr:
1525 return AArch64::ADDSXrr;
1526 case AArch64::ADDXri:
1527 return AArch64::ADDSXri;
1528 case AArch64::ADCWr:
1529 return AArch64::ADCSWr;
1530 case AArch64::ADCXr:
1531 return AArch64::ADCSXr;
1532 case AArch64::SUBWrr:
1533 return AArch64::SUBSWrr;
1534 case AArch64::SUBWri:
1535 return AArch64::SUBSWri;
1536 case AArch64::SUBXrr:
1537 return AArch64::SUBSXrr;
1538 case AArch64::SUBXri:
1539 return AArch64::SUBSXri;
1540 case AArch64::SBCWr:
1541 return AArch64::SBCSWr;
1542 case AArch64::SBCXr:
1543 return AArch64::SBCSXr;
1544 case AArch64::ANDWri:
1545 return AArch64::ANDSWri;
1546 case AArch64::ANDXri:
1547 return AArch64::ANDSXri;
1554 if (BB->isLiveIn(AArch64::NZCV))
1567 case AArch64::Bcc: {
1573 case AArch64::CSINVWr:
1574 case AArch64::CSINVXr:
1575 case AArch64::CSINCWr:
1576 case AArch64::CSINCXr:
1577 case AArch64::CSELWr:
1578 case AArch64::CSELXr:
1579 case AArch64::CSNEGWr:
1580 case AArch64::CSNEGXr:
1581 case AArch64::FCSELSrrr:
1582 case AArch64::FCSELDrrr: {
1649std::optional<UsedNZCV>
1654 if (
MI.getParent() != CmpParent)
1655 return std::nullopt;
1658 return std::nullopt;
1663 if (Instr.readsRegister(AArch64::NZCV, &
TRI)) {
1666 return std::nullopt;
1671 if (Instr.modifiesRegister(AArch64::NZCV, &
TRI))
1674 return NZCVUsedAfterCmp;
1678 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1682 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1700 const unsigned CmpOpcode = CmpInstr.
getOpcode();
1705 if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V)
1718bool AArch64InstrInfo::substituteCmpToZero(
1729 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1736 MI->setDesc(
get(NewOpc));
1740 assert(succeeded &&
"Some operands reg class are incompatible!");
1741 MI->addRegisterDefined(AArch64::NZCV, &
TRI);
1753 assert((CmpValue == 0 || CmpValue == 1) &&
1754 "Only comparisons to 0 or 1 considered for removal!");
1757 unsigned MIOpc =
MI.getOpcode();
1758 if (MIOpc == AArch64::CSINCWr) {
1759 if (
MI.getOperand(1).getReg() != AArch64::WZR ||
1760 MI.getOperand(2).getReg() != AArch64::WZR)
1762 }
else if (MIOpc == AArch64::CSINCXr) {
1763 if (
MI.getOperand(1).getReg() != AArch64::XZR ||
1764 MI.getOperand(2).getReg() != AArch64::XZR)
1774 if (
MI.findRegisterDefOperandIdx(AArch64::NZCV,
true) != -1)
1778 const unsigned CmpOpcode = CmpInstr.
getOpcode();
1780 if (CmpValue && !IsSubsRegImm)
1782 if (!CmpValue && !IsSubsRegImm && !
isADDSRegImm(CmpOpcode))
1787 if (MIUsedNZCV.
C || MIUsedNZCV.
V)
1790 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1794 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1797 if ((MIUsedNZCV.
Z && NZCVUsedAfterCmp->N) ||
1798 (MIUsedNZCV.
N && NZCVUsedAfterCmp->Z))
1801 if (MIUsedNZCV.
N && !CmpValue)
1843bool AArch64InstrInfo::removeCmpToZeroOrOne(
1851 bool IsInvertCC =
false;
1861 assert(
Idx >= 0 &&
"Unexpected instruction using CC.");
1872 if (
MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1873 MI.getOpcode() != AArch64::CATCHRET)
1881 if (
MI.getOpcode() == AArch64::CATCHRET) {
1890 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1892 FirstEpilogSEH = std::next(FirstEpilogSEH);
1906 if (M.getStackProtectorGuard() ==
"sysreg") {
1916 int Offset = M.getStackProtectorGuardOffset();
1968 cast<GlobalValue>((*
MI.memoperands_begin())->getValue());
1977 unsigned Reg32 =
TRI->getSubReg(Reg, AArch64::sub_32);
2019 unsigned Reg32 =
TRI->getSubReg(Reg, AArch64::sub_32);
2042 switch (
MI.getOpcode()) {
2045 case AArch64::MOVZWi:
2046 case AArch64::MOVZXi:
2047 if (
MI.getOperand(1).isImm() &&
MI.getOperand(1).getImm() == 0) {
2048 assert(
MI.getDesc().getNumOperands() == 3 &&
2049 MI.getOperand(2).getImm() == 0 &&
"invalid MOVZi operands");
2053 case AArch64::ANDWri:
2054 return MI.getOperand(1).getReg() == AArch64::WZR;
2055 case AArch64::ANDXri:
2056 return MI.getOperand(1).getReg() == AArch64::XZR;
2057 case TargetOpcode::COPY:
2058 return MI.getOperand(1).getReg() == AArch64::WZR;
2066 switch (
MI.getOpcode()) {
2069 case TargetOpcode::COPY: {
2072 return (AArch64::GPR32RegClass.
contains(DstReg) ||
2073 AArch64::GPR64RegClass.
contains(DstReg));
2075 case AArch64::ORRXrs:
2076 if (
MI.getOperand(1).getReg() == AArch64::XZR) {
2077 assert(
MI.getDesc().getNumOperands() == 4 &&
2078 MI.getOperand(3).getImm() == 0 &&
"invalid ORRrs operands");
2082 case AArch64::ADDXri:
2083 if (
MI.getOperand(2).getImm() == 0) {
2084 assert(
MI.getDesc().getNumOperands() == 4 &&
2085 MI.getOperand(3).getImm() == 0 &&
"invalid ADDXri operands");
2096 switch (
MI.getOpcode()) {
2099 case TargetOpcode::COPY: {
2101 return AArch64::FPR128RegClass.contains(DstReg);
2103 case AArch64::ORRv16i8:
2104 if (
MI.getOperand(1).getReg() ==
MI.getOperand(2).getReg()) {
2105 assert(
MI.getDesc().getNumOperands() == 3 &&
MI.getOperand(0).isReg() &&
2106 "invalid ORRv16i8 operands");
2115 int &FrameIndex)
const {
2116 switch (
MI.getOpcode()) {
2119 case AArch64::LDRWui:
2120 case AArch64::LDRXui:
2121 case AArch64::LDRBui:
2122 case AArch64::LDRHui:
2123 case AArch64::LDRSui:
2124 case AArch64::LDRDui:
2125 case AArch64::LDRQui:
2126 if (
MI.getOperand(0).getSubReg() == 0 &&
MI.getOperand(1).isFI() &&
2127 MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0) {
2128 FrameIndex =
MI.getOperand(1).getIndex();
2129 return MI.getOperand(0).getReg();
2138 int &FrameIndex)
const {
2139 switch (
MI.getOpcode()) {
2142 case AArch64::STRWui:
2143 case AArch64::STRXui:
2144 case AArch64::STRBui:
2145 case AArch64::STRHui:
2146 case AArch64::STRSui:
2147 case AArch64::STRDui:
2148 case AArch64::STRQui:
2149 case AArch64::LDR_PXI:
2150 case AArch64::STR_PXI:
2151 if (
MI.getOperand(0).getSubReg() == 0 &&
MI.getOperand(1).isFI() &&
2152 MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0) {
2153 FrameIndex =
MI.getOperand(1).getIndex();
2154 return MI.getOperand(0).getReg();
2164 return MMO->getFlags() & MOSuppressPair;
2170 if (
MI.memoperands_empty())
2178 return MMO->getFlags() & MOStridedAccess;
2186 case AArch64::STURSi:
2187 case AArch64::STRSpre:
2188 case AArch64::STURDi:
2189 case AArch64::STRDpre:
2190 case AArch64::STURQi:
2191 case AArch64::STRQpre:
2192 case AArch64::STURBBi:
2193 case AArch64::STURHHi:
2194 case AArch64::STURWi:
2195 case AArch64::STRWpre:
2196 case AArch64::STURXi:
2197 case AArch64::STRXpre:
2198 case AArch64::LDURSi:
2199 case AArch64::LDRSpre:
2200 case AArch64::LDURDi:
2201 case AArch64::LDRDpre:
2202 case AArch64::LDURQi:
2203 case AArch64::LDRQpre:
2204 case AArch64::LDURWi:
2205 case AArch64::LDRWpre:
2206 case AArch64::LDURXi:
2207 case AArch64::LDRXpre:
2208 case AArch64::LDURSWi:
2209 case AArch64::LDURHHi:
2210 case AArch64::LDURBBi:
2211 case AArch64::LDURSBWi:
2212 case AArch64::LDURSHWi:
2220 case AArch64::PRFMui:
return AArch64::PRFUMi;
2221 case AArch64::LDRXui:
return AArch64::LDURXi;
2222 case AArch64::LDRWui:
return AArch64::LDURWi;
2223 case AArch64::LDRBui:
return AArch64::LDURBi;
2224 case AArch64::LDRHui:
return AArch64::LDURHi;
2225 case AArch64::LDRSui:
return AArch64::LDURSi;
2226 case AArch64::LDRDui:
return AArch64::LDURDi;
2227 case AArch64::LDRQui:
return AArch64::LDURQi;
2228 case AArch64::LDRBBui:
return AArch64::LDURBBi;
2229 case AArch64::LDRHHui:
return AArch64::LDURHHi;
2230 case AArch64::LDRSBXui:
return AArch64::LDURSBXi;
2231 case AArch64::LDRSBWui:
return AArch64::LDURSBWi;
2232 case AArch64::LDRSHXui:
return AArch64::LDURSHXi;
2233 case AArch64::LDRSHWui:
return AArch64::LDURSHWi;
2234 case AArch64::LDRSWui:
return AArch64::LDURSWi;
2235 case AArch64::STRXui:
return AArch64::STURXi;
2236 case AArch64::STRWui:
return AArch64::STURWi;
2237 case AArch64::STRBui:
return AArch64::STURBi;
2238 case AArch64::STRHui:
return AArch64::STURHi;
2239 case AArch64::STRSui:
return AArch64::STURSi;
2240 case AArch64::STRDui:
return AArch64::STURDi;
2241 case AArch64::STRQui:
return AArch64::STURQi;
2242 case AArch64::STRBBui:
return AArch64::STURBBi;
2243 case AArch64::STRHHui:
return AArch64::STURHHi;
2251 case AArch64::LDPXi:
2252 case AArch64::LDPDi:
2253 case AArch64::STPXi:
2254 case AArch64::STPDi:
2255 case AArch64::LDNPXi:
2256 case AArch64::LDNPDi:
2257 case AArch64::STNPXi:
2258 case AArch64::STNPDi:
2259 case AArch64::LDPQi:
2260 case AArch64::STPQi:
2261 case AArch64::LDNPQi:
2262 case AArch64::STNPQi:
2263 case AArch64::LDPWi:
2264 case AArch64::LDPSi:
2265 case AArch64::STPWi:
2266 case AArch64::STPSi:
2267 case AArch64::LDNPWi:
2268 case AArch64::LDNPSi:
2269 case AArch64::STNPWi:
2270 case AArch64::STNPSi:
2272 case AArch64::STGPi:
2274 case AArch64::LD1B_IMM:
2275 case AArch64::LD1B_H_IMM:
2276 case AArch64::LD1B_S_IMM:
2277 case AArch64::LD1B_D_IMM:
2278 case AArch64::LD1SB_H_IMM:
2279 case AArch64::LD1SB_S_IMM:
2280 case AArch64::LD1SB_D_IMM:
2281 case AArch64::LD1H_IMM:
2282 case AArch64::LD1H_S_IMM:
2283 case AArch64::LD1H_D_IMM:
2284 case AArch64::LD1SH_S_IMM:
2285 case AArch64::LD1SH_D_IMM:
2286 case AArch64::LD1W_IMM:
2287 case AArch64::LD1W_D_IMM:
2288 case AArch64::LD1SW_D_IMM:
2289 case AArch64::LD1D_IMM:
2291 case AArch64::LD2B_IMM:
2292 case AArch64::LD2H_IMM:
2293 case AArch64::LD2W_IMM:
2294 case AArch64::LD2D_IMM:
2295 case AArch64::LD3B_IMM:
2296 case AArch64::LD3H_IMM:
2297 case AArch64::LD3W_IMM:
2298 case AArch64::LD3D_IMM:
2299 case AArch64::LD4B_IMM:
2300 case AArch64::LD4H_IMM:
2301 case AArch64::LD4W_IMM:
2302 case AArch64::LD4D_IMM:
2304 case AArch64::ST1B_IMM:
2305 case AArch64::ST1B_H_IMM:
2306 case AArch64::ST1B_S_IMM:
2307 case AArch64::ST1B_D_IMM:
2308 case AArch64::ST1H_IMM:
2309 case AArch64::ST1H_S_IMM:
2310 case AArch64::ST1H_D_IMM:
2311 case AArch64::ST1W_IMM:
2312 case AArch64::ST1W_D_IMM:
2313 case AArch64::ST1D_IMM:
2315 case AArch64::ST2B_IMM:
2316 case AArch64::ST2H_IMM:
2317 case AArch64::ST2W_IMM:
2318 case AArch64::ST2D_IMM:
2319 case AArch64::ST3B_IMM:
2320 case AArch64::ST3H_IMM:
2321 case AArch64::ST3W_IMM:
2322 case AArch64::ST3D_IMM:
2323 case AArch64::ST4B_IMM:
2324 case AArch64::ST4H_IMM:
2325 case AArch64::ST4W_IMM:
2326 case AArch64::ST4D_IMM:
2328 case AArch64::LD1RB_IMM:
2329 case AArch64::LD1RB_H_IMM:
2330 case AArch64::LD1RB_S_IMM:
2331 case AArch64::LD1RB_D_IMM:
2332 case AArch64::LD1RSB_H_IMM:
2333 case AArch64::LD1RSB_S_IMM:
2334 case AArch64::LD1RSB_D_IMM:
2335 case AArch64::LD1RH_IMM:
2336 case AArch64::LD1RH_S_IMM:
2337 case AArch64::LD1RH_D_IMM:
2338 case AArch64::LD1RSH_S_IMM:
2339 case AArch64::LD1RSH_D_IMM:
2340 case AArch64::LD1RW_IMM:
2341 case AArch64::LD1RW_D_IMM:
2342 case AArch64::LD1RSW_IMM:
2343 case AArch64::LD1RD_IMM:
2345 case AArch64::LDNT1B_ZRI:
2346 case AArch64::LDNT1H_ZRI:
2347 case AArch64::LDNT1W_ZRI:
2348 case AArch64::LDNT1D_ZRI:
2349 case AArch64::STNT1B_ZRI:
2350 case AArch64::STNT1H_ZRI:
2351 case AArch64::STNT1W_ZRI:
2352 case AArch64::STNT1D_ZRI:
2354 case AArch64::LDNF1B_IMM:
2355 case AArch64::LDNF1B_H_IMM:
2356 case AArch64::LDNF1B_S_IMM:
2357 case AArch64::LDNF1B_D_IMM:
2358 case AArch64::LDNF1SB_H_IMM:
2359 case AArch64::LDNF1SB_S_IMM:
2360 case AArch64::LDNF1SB_D_IMM:
2361 case AArch64::LDNF1H_IMM:
2362 case AArch64::LDNF1H_S_IMM:
2363 case AArch64::LDNF1H_D_IMM:
2364 case AArch64::LDNF1SH_S_IMM:
2365 case AArch64::LDNF1SH_D_IMM:
2366 case AArch64::LDNF1W_IMM:
2367 case AArch64::LDNF1W_D_IMM:
2368 case AArch64::LDNF1SW_D_IMM:
2369 case AArch64::LDNF1D_IMM:
2373 case AArch64::LDR_PXI:
2374 case AArch64::STR_PXI:
2380 switch (
MI.getOpcode()) {
2384 case AArch64::STRSui:
2385 case AArch64::STRDui:
2386 case AArch64::STRQui:
2387 case AArch64::STRXui:
2388 case AArch64::STRWui:
2389 case AArch64::LDRSui:
2390 case AArch64::LDRDui:
2391 case AArch64::LDRQui:
2392 case AArch64::LDRXui:
2393 case AArch64::LDRWui:
2394 case AArch64::LDRSWui:
2396 case AArch64::STURSi:
2397 case AArch64::STRSpre:
2398 case AArch64::STURDi:
2399 case AArch64::STRDpre:
2400 case AArch64::STURQi:
2401 case AArch64::STRQpre:
2402 case AArch64::STURWi:
2403 case AArch64::STRWpre:
2404 case AArch64::STURXi:
2405 case AArch64::STRXpre:
2406 case AArch64::LDURSi:
2407 case AArch64::LDRSpre:
2408 case AArch64::LDURDi:
2409 case AArch64::LDRDpre:
2410 case AArch64::LDURQi:
2411 case AArch64::LDRQpre:
2412 case AArch64::LDURWi:
2413 case AArch64::LDRWpre:
2414 case AArch64::LDURXi:
2415 case AArch64::LDRXpre:
2416 case AArch64::LDURSWi:
2426 case AArch64::ADDWri:
2427 return AArch64::ADDSWri;
2428 case AArch64::ADDWrr:
2429 return AArch64::ADDSWrr;
2430 case AArch64::ADDWrs:
2431 return AArch64::ADDSWrs;
2432 case AArch64::ADDWrx:
2433 return AArch64::ADDSWrx;
2434 case AArch64::ANDWri:
2435 return AArch64::ANDSWri;
2436 case AArch64::ANDWrr:
2437 return AArch64::ANDSWrr;
2438 case AArch64::ANDWrs:
2439 return AArch64::ANDSWrs;
2440 case AArch64::BICWrr:
2441 return AArch64::BICSWrr;
2442 case AArch64::BICWrs:
2443 return AArch64::BICSWrs;
2444 case AArch64::SUBWri:
2445 return AArch64::SUBSWri;
2446 case AArch64::SUBWrr:
2447 return AArch64::SUBSWrr;
2448 case AArch64::SUBWrs:
2449 return AArch64::SUBSWrs;
2450 case AArch64::SUBWrx:
2451 return AArch64::SUBSWrx;
2453 case AArch64::ADDXri:
2454 return AArch64::ADDSXri;
2455 case AArch64::ADDXrr:
2456 return AArch64::ADDSXrr;
2457 case AArch64::ADDXrs:
2458 return AArch64::ADDSXrs;
2459 case AArch64::ADDXrx:
2460 return AArch64::ADDSXrx;
2461 case AArch64::ANDXri:
2462 return AArch64::ANDSXri;
2463 case AArch64::ANDXrr:
2464 return AArch64::ANDSXrr;
2465 case AArch64::ANDXrs:
2466 return AArch64::ANDSXrs;
2467 case AArch64::BICXrr:
2468 return AArch64::BICSXrr;
2469 case AArch64::BICXrs:
2470 return AArch64::BICSXrs;
2471 case AArch64::SUBXri:
2472 return AArch64::SUBSXri;
2473 case AArch64::SUBXrr:
2474 return AArch64::SUBSXrr;
2475 case AArch64::SUBXrs:
2476 return AArch64::SUBSXrs;
2477 case AArch64::SUBXrx:
2478 return AArch64::SUBSXrx;
2480 case AArch64::AND_PPzPP:
2481 return AArch64::ANDS_PPzPP;
2482 case AArch64::BIC_PPzPP:
2483 return AArch64::BICS_PPzPP;
2484 case AArch64::EOR_PPzPP:
2485 return AArch64::EORS_PPzPP;
2486 case AArch64::NAND_PPzPP:
2487 return AArch64::NANDS_PPzPP;
2488 case AArch64::NOR_PPzPP:
2489 return AArch64::NORS_PPzPP;
2490 case AArch64::ORN_PPzPP:
2491 return AArch64::ORNS_PPzPP;
2492 case AArch64::ORR_PPzPP:
2493 return AArch64::ORRS_PPzPP;
2494 case AArch64::BRKA_PPzP:
2495 return AArch64::BRKAS_PPzP;
2496 case AArch64::BRKPA_PPzPP:
2497 return AArch64::BRKPAS_PPzPP;
2498 case AArch64::BRKB_PPzP:
2499 return AArch64::BRKBS_PPzP;
2500 case AArch64::BRKPB_PPzPP:
2501 return AArch64::BRKPBS_PPzPP;
2502 case AArch64::BRKN_PPzP:
2503 return AArch64::BRKNS_PPzP;
2504 case AArch64::RDFFR_PPz:
2505 return AArch64::RDFFRS_PPz;
2506 case AArch64::PTRUE_B:
2507 return AArch64::PTRUES_B;
2518 if (
MI.hasOrderedMemoryRef())
2523 assert((
MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2524 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2525 "Expected a reg or frame index operand.");
2529 bool IsImmPreLdSt = IsPreLdSt &&
MI.getOperand(3).isImm();
2531 if (!
MI.getOperand(2).isImm() && !IsImmPreLdSt)
2543 if (
MI.getOperand(1).isReg() && !IsPreLdSt) {
2544 Register BaseReg =
MI.getOperand(1).getReg();
2546 if (
MI.modifiesRegister(BaseReg,
TRI))
2559 const MCAsmInfo *MAI =
MI.getMF()->getTarget().getMCAsmInfo();
2561 MI.getMF()->getFunction().needsUnwindTableEntry();
2567 if (Subtarget.isPaired128Slow()) {
2568 switch (
MI.getOpcode()) {
2571 case AArch64::LDURQi:
2572 case AArch64::STURQi:
2573 case AArch64::LDRQui:
2574 case AArch64::STRQui:
2584 int64_t &
Offset,
bool &OffsetIsScalable,
unsigned &Width,
2597std::optional<ExtAddrMode>
2602 bool OffsetIsScalable;
2603 if (!getMemOperandWithOffset(MemI,
Base,
Offset, OffsetIsScalable,
TRI))
2604 return std::nullopt;
2607 return std::nullopt;
2618 bool &OffsetIsScalable,
unsigned &Width,
2639 int64_t Dummy1, Dummy2;
2658 if (!BaseOp->
isReg() && !BaseOp->
isFI())
2668 assert(OfsOp.
isImm() &&
"Offset operand wasn't immediate.");
2673 unsigned &Width, int64_t &MinOffset,
2674 int64_t &MaxOffset) {
2681 MinOffset = MaxOffset = 0;
2683 case AArch64::STRWpost:
2684 case AArch64::LDRWpost:
2690 case AArch64::LDURQi:
2691 case AArch64::STURQi:
2697 case AArch64::PRFUMi:
2698 case AArch64::LDURXi:
2699 case AArch64::LDURDi:
2700 case AArch64::STURXi:
2701 case AArch64::STURDi:
2707 case AArch64::LDURWi:
2708 case AArch64::LDURSi:
2709 case AArch64::LDURSWi:
2710 case AArch64::STURWi:
2711 case AArch64::STURSi:
2717 case AArch64::LDURHi:
2718 case AArch64::LDURHHi:
2719 case AArch64::LDURSHXi:
2720 case AArch64::LDURSHWi:
2721 case AArch64::STURHi:
2722 case AArch64::STURHHi:
2728 case AArch64::LDURBi:
2729 case AArch64::LDURBBi:
2730 case AArch64::LDURSBXi:
2731 case AArch64::LDURSBWi:
2732 case AArch64::STURBi:
2733 case AArch64::STURBBi:
2739 case AArch64::LDPQi:
2740 case AArch64::LDNPQi:
2741 case AArch64::STPQi:
2742 case AArch64::STNPQi:
2748 case AArch64::LDRQui:
2749 case AArch64::STRQui:
2755 case AArch64::LDPXi:
2756 case AArch64::LDPDi:
2757 case AArch64::LDNPXi:
2758 case AArch64::LDNPDi:
2759 case AArch64::STPXi:
2760 case AArch64::STPDi:
2761 case AArch64::STNPXi:
2762 case AArch64::STNPDi:
2768 case AArch64::PRFMui:
2769 case AArch64::LDRXui:
2770 case AArch64::LDRDui:
2771 case AArch64::STRXui:
2772 case AArch64::STRDui:
2778 case AArch64::StoreSwiftAsyncContext:
2785 case AArch64::LDPWi:
2786 case AArch64::LDPSi:
2787 case AArch64::LDNPWi:
2788 case AArch64::LDNPSi:
2789 case AArch64::STPWi:
2790 case AArch64::STPSi:
2791 case AArch64::STNPWi:
2792 case AArch64::STNPSi:
2798 case AArch64::LDRWui:
2799 case AArch64::LDRSui:
2800 case AArch64::LDRSWui:
2801 case AArch64::STRWui:
2802 case AArch64::STRSui:
2808 case AArch64::LDRHui:
2809 case AArch64::LDRHHui:
2810 case AArch64::LDRSHWui:
2811 case AArch64::LDRSHXui:
2812 case AArch64::STRHui:
2813 case AArch64::STRHHui:
2819 case AArch64::LDRBui:
2820 case AArch64::LDRBBui:
2821 case AArch64::LDRSBWui:
2822 case AArch64::LDRSBXui:
2823 case AArch64::STRBui:
2824 case AArch64::STRBBui:
2830 case AArch64::STPXpre:
2831 case AArch64::LDPXpost:
2832 case AArch64::STPDpre:
2833 case AArch64::LDPDpost:
2839 case AArch64::STPQpre:
2840 case AArch64::LDPQpost:
2846 case AArch64::STRXpre:
2847 case AArch64::STRDpre:
2848 case AArch64::LDRXpost:
2849 case AArch64::LDRDpost:
2855 case AArch64::STRQpre:
2856 case AArch64::LDRQpost:
2868 case AArch64::TAGPstack:
2878 case AArch64::STZGi:
2884 case AArch64::STR_ZZZZXI:
2885 case AArch64::LDR_ZZZZXI:
2887 Width = SVEMaxBytesPerVector * 4;
2891 case AArch64::STR_ZZZXI:
2892 case AArch64::LDR_ZZZXI:
2894 Width = SVEMaxBytesPerVector * 3;
2898 case AArch64::STR_ZZXI:
2899 case AArch64::LDR_ZZXI:
2901 Width = SVEMaxBytesPerVector * 2;
2905 case AArch64::LDR_PXI:
2906 case AArch64::STR_PXI:
2908 Width = SVEMaxBytesPerVector / 8;
2912 case AArch64::LDR_ZXI:
2913 case AArch64::STR_ZXI:
2915 Width = SVEMaxBytesPerVector;
2919 case AArch64::LD1B_IMM:
2920 case AArch64::LD1H_IMM:
2921 case AArch64::LD1W_IMM:
2922 case AArch64::LD1D_IMM:
2923 case AArch64::LDNT1B_ZRI:
2924 case AArch64::LDNT1H_ZRI:
2925 case AArch64::LDNT1W_ZRI:
2926 case AArch64::LDNT1D_ZRI:
2927 case AArch64::ST1B_IMM:
2928 case AArch64::ST1H_IMM:
2929 case AArch64::ST1W_IMM:
2930 case AArch64::ST1D_IMM:
2931 case AArch64::STNT1B_ZRI:
2932 case AArch64::STNT1H_ZRI:
2933 case AArch64::STNT1W_ZRI:
2934 case AArch64::STNT1D_ZRI:
2935 case AArch64::LDNF1B_IMM:
2936 case AArch64::LDNF1H_IMM:
2937 case AArch64::LDNF1W_IMM:
2938 case AArch64::LDNF1D_IMM:
2942 Width = SVEMaxBytesPerVector;
2946 case AArch64::LD2B_IMM:
2947 case AArch64::LD2H_IMM:
2948 case AArch64::LD2W_IMM:
2949 case AArch64::LD2D_IMM:
2950 case AArch64::ST2B_IMM:
2951 case AArch64::ST2H_IMM:
2952 case AArch64::ST2W_IMM:
2953 case AArch64::ST2D_IMM:
2955 Width = SVEMaxBytesPerVector * 2;
2959 case AArch64::LD3B_IMM:
2960 case AArch64::LD3H_IMM:
2961 case AArch64::LD3W_IMM:
2962 case AArch64::LD3D_IMM:
2963 case AArch64::ST3B_IMM:
2964 case AArch64::ST3H_IMM:
2965 case AArch64::ST3W_IMM:
2966 case AArch64::ST3D_IMM:
2968 Width = SVEMaxBytesPerVector * 3;
2972 case AArch64::LD4B_IMM:
2973 case AArch64::LD4H_IMM:
2974 case AArch64::LD4W_IMM:
2975 case AArch64::LD4D_IMM:
2976 case AArch64::ST4B_IMM:
2977 case AArch64::ST4H_IMM:
2978 case AArch64::ST4W_IMM:
2979 case AArch64::ST4D_IMM:
2981 Width = SVEMaxBytesPerVector * 4;
2985 case AArch64::LD1B_H_IMM:
2986 case AArch64::LD1SB_H_IMM:
2987 case AArch64::LD1H_S_IMM:
2988 case AArch64::LD1SH_S_IMM:
2989 case AArch64::LD1W_D_IMM:
2990 case AArch64::LD1SW_D_IMM:
2991 case AArch64::ST1B_H_IMM:
2992 case AArch64::ST1H_S_IMM:
2993 case AArch64::ST1W_D_IMM:
2994 case AArch64::LDNF1B_H_IMM:
2995 case AArch64::LDNF1SB_H_IMM:
2996 case AArch64::LDNF1H_S_IMM:
2997 case AArch64::LDNF1SH_S_IMM:
2998 case AArch64::LDNF1W_D_IMM:
2999 case AArch64::LDNF1SW_D_IMM:
3003 Width = SVEMaxBytesPerVector / 2;
3007 case AArch64::LD1B_S_IMM:
3008 case AArch64::LD1SB_S_IMM:
3009 case AArch64::LD1H_D_IMM:
3010 case AArch64::LD1SH_D_IMM:
3011 case AArch64::ST1B_S_IMM:
3012 case AArch64::ST1H_D_IMM:
3013 case AArch64::LDNF1B_S_IMM:
3014 case AArch64::LDNF1SB_S_IMM:
3015 case AArch64::LDNF1H_D_IMM:
3016 case AArch64::LDNF1SH_D_IMM:
3020 Width = SVEMaxBytesPerVector / 4;
3024 case AArch64::LD1B_D_IMM:
3025 case AArch64::LD1SB_D_IMM:
3026 case AArch64::ST1B_D_IMM:
3027 case AArch64::LDNF1B_D_IMM:
3028 case AArch64::LDNF1SB_D_IMM:
3032 Width = SVEMaxBytesPerVector / 8;
3036 case AArch64::ST2Gi:
3037 case AArch64::STZ2Gi:
3043 case AArch64::STGPi:
3049 case AArch64::LD1RB_IMM:
3050 case AArch64::LD1RB_H_IMM:
3051 case AArch64::LD1RB_S_IMM:
3052 case AArch64::LD1RB_D_IMM:
3053 case AArch64::LD1RSB_H_IMM:
3054 case AArch64::LD1RSB_S_IMM:
3055 case AArch64::LD1RSB_D_IMM:
3061 case AArch64::LD1RH_IMM:
3062 case AArch64::LD1RH_S_IMM:
3063 case AArch64::LD1RH_D_IMM:
3064 case AArch64::LD1RSH_S_IMM:
3065 case AArch64::LD1RSH_D_IMM:
3071 case AArch64::LD1RW_IMM:
3072 case AArch64::LD1RW_D_IMM:
3073 case AArch64::LD1RSW_IMM:
3079 case AArch64::LD1RD_IMM:
3095 case AArch64::LDRBBui:
3096 case AArch64::LDURBBi:
3097 case AArch64::LDRSBWui:
3098 case AArch64::LDURSBWi:
3099 case AArch64::STRBBui:
3100 case AArch64::STURBBi:
3102 case AArch64::LDRHHui:
3103 case AArch64::LDURHHi:
3104 case AArch64::LDRSHWui:
3105 case AArch64::LDURSHWi:
3106 case AArch64::STRHHui:
3107 case AArch64::STURHHi:
3109 case AArch64::LDRSui:
3110 case AArch64::LDURSi:
3111 case AArch64::LDRSpre:
3112 case AArch64::LDRSWui:
3113 case AArch64::LDURSWi:
3114 case AArch64::LDRWpre:
3115 case AArch64::LDRWui:
3116 case AArch64::LDURWi:
3117 case AArch64::STRSui:
3118 case AArch64::STURSi:
3119 case AArch64::STRSpre:
3120 case AArch64::STRWui:
3121 case AArch64::STURWi:
3122 case AArch64::STRWpre:
3123 case AArch64::LDPSi:
3124 case AArch64::LDPSWi:
3125 case AArch64::LDPWi:
3126 case AArch64::STPSi:
3127 case AArch64::STPWi:
3129 case AArch64::LDRDui:
3130 case AArch64::LDURDi:
3131 case AArch64::LDRDpre:
3132 case AArch64::LDRXui:
3133 case AArch64::LDURXi:
3134 case AArch64::LDRXpre:
3135 case AArch64::STRDui:
3136 case AArch64::STURDi:
3137 case AArch64::STRDpre:
3138 case AArch64::STRXui:
3139 case AArch64::STURXi:
3140 case AArch64::STRXpre:
3141 case AArch64::LDPDi:
3142 case AArch64::LDPXi:
3143 case AArch64::STPDi:
3144 case AArch64::STPXi:
3146 case AArch64::LDRQui:
3147 case AArch64::LDURQi:
3148 case AArch64::STRQui:
3149 case AArch64::STURQi:
3150 case AArch64::STRQpre:
3151 case AArch64::LDPQi:
3152 case AArch64::LDRQpre:
3153 case AArch64::STPQi:
3155 case AArch64::STZGi:
3156 case AArch64::ST2Gi:
3157 case AArch64::STZ2Gi:
3158 case AArch64::STGPi:
3164 switch (
MI.getOpcode()) {
3167 case AArch64::LDRWpre:
3168 case AArch64::LDRXpre:
3169 case AArch64::LDRSpre:
3170 case AArch64::LDRDpre:
3171 case AArch64::LDRQpre:
3177 switch (
MI.getOpcode()) {
3180 case AArch64::STRWpre:
3181 case AArch64::STRXpre:
3182 case AArch64::STRSpre:
3183 case AArch64::STRDpre:
3184 case AArch64::STRQpre:
3194 switch (
MI.getOpcode()) {
3197 case AArch64::LDPSi:
3198 case AArch64::LDPSWi:
3199 case AArch64::LDPDi:
3200 case AArch64::LDPQi:
3201 case AArch64::LDPWi:
3202 case AArch64::LDPXi:
3203 case AArch64::STPSi:
3204 case AArch64::STPDi:
3205 case AArch64::STPQi:
3206 case AArch64::STPWi:
3207 case AArch64::STPXi:
3208 case AArch64::STGPi:
3217 return MI.getOperand(
Idx);
3225 return MI.getOperand(
Idx);
3230 if (
MI.getParent() ==
nullptr)
3240 auto Reg = Op.getReg();
3241 if (Reg.isPhysical())
3242 return AArch64::FPR128RegClass.
contains(Reg);
3244 return TRC == &AArch64::FPR128RegClass ||
3245 TRC == &AArch64::FPR128_loRegClass;
3254 auto Reg = Op.getReg();
3255 if (Reg.isPhysical())
3256 return AArch64::FPR128RegClass.
contains(Reg) ||
3257 AArch64::FPR64RegClass.contains(Reg) ||
3258 AArch64::FPR32RegClass.contains(Reg) ||
3259 AArch64::FPR16RegClass.contains(Reg) ||
3260 AArch64::FPR8RegClass.contains(Reg);
3263 return TRC == &AArch64::FPR128RegClass ||
3264 TRC == &AArch64::FPR128_loRegClass ||
3265 TRC == &AArch64::FPR64RegClass ||
3266 TRC == &AArch64::FPR64_loRegClass ||
3267 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
3268 TRC == &AArch64::FPR8RegClass;
3290 if (FirstOpc == SecondOpc)
3296 case AArch64::LDRWui:
3297 case AArch64::LDURWi:
3298 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
3299 case AArch64::LDRSWui:
3300 case AArch64::LDURSWi:
3301 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
3308 int64_t Offset1,
unsigned Opcode1,
int FI2,
3309 int64_t Offset2,
unsigned Opcode2) {
3315 assert(ObjectOffset1 <= ObjectOffset2 &&
"Object offsets are not ordered.");
3318 if (ObjectOffset1 % Scale1 != 0)
3320 ObjectOffset1 /= Scale1;
3322 if (ObjectOffset2 % Scale2 != 0)
3324 ObjectOffset2 /= Scale2;
3325 ObjectOffset1 += Offset1;
3326 ObjectOffset2 += Offset2;
3327 return ObjectOffset1 + 1 == ObjectOffset2;
3339 unsigned NumBytes)
const {
3349 "Only base registers and frame indices are supported.");
3363 unsigned FirstOpc = FirstLdSt.
getOpcode();
3364 unsigned SecondOpc = SecondLdSt.
getOpcode();
3384 if (Offset1 > 63 || Offset1 < -64)
3389 if (BaseOp1.
isFI()) {
3391 "Caller should have ordered offsets.");
3396 BaseOp2.
getIndex(), Offset2, SecondOpc);
3399 assert(Offset1 <= Offset2 &&
"Caller should have ordered offsets.");
3401 return Offset1 + 1 == Offset2;
3405 unsigned Reg,
unsigned SubIdx,
3409 return MIB.
addReg(Reg, State);
3412 return MIB.
addReg(
TRI->getSubReg(Reg, SubIdx), State);
3413 return MIB.
addReg(Reg, State, SubIdx);
3420 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
3429 assert(Subtarget.hasNEON() &&
"Unexpected register copy without NEON");
3431 uint16_t DestEncoding =
TRI->getEncodingValue(DestReg);
3432 uint16_t SrcEncoding =
TRI->getEncodingValue(SrcReg);
3433 unsigned NumRegs = Indices.
size();
3435 int SubReg = 0, End = NumRegs, Incr = 1;
3453 unsigned SrcReg,
bool KillSrc,
3454 unsigned Opcode,
unsigned ZeroReg,
3457 unsigned NumRegs = Indices.
size();
3460 uint16_t DestEncoding =
TRI->getEncodingValue(DestReg);
3461 uint16_t SrcEncoding =
TRI->getEncodingValue(SrcReg);
3462 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
3463 "GPR reg sequences should not be able to overlap");
3479 if (AArch64::GPR32spRegClass.
contains(DestReg) &&
3480 (AArch64::GPR32spRegClass.
contains(SrcReg) || SrcReg == AArch64::WZR)) {
3483 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
3485 if (Subtarget.hasZeroCycleRegMove()) {
3488 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3490 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3506 }
else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
3511 if (Subtarget.hasZeroCycleRegMove()) {
3514 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3516 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3536 if (AArch64::PPRRegClass.
contains(DestReg) &&
3537 AArch64::PPRRegClass.
contains(SrcReg)) {
3547 if (AArch64::ZPRRegClass.
contains(DestReg) &&
3548 AArch64::ZPRRegClass.
contains(SrcReg)) {
3557 if (AArch64::ZPR2RegClass.
contains(DestReg) &&
3558 AArch64::ZPR2RegClass.
contains(SrcReg)) {
3560 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
3567 if (AArch64::ZPR3RegClass.
contains(DestReg) &&
3568 AArch64::ZPR3RegClass.
contains(SrcReg)) {
3570 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
3578 if (AArch64::ZPR4RegClass.
contains(DestReg) &&
3579 AArch64::ZPR4RegClass.
contains(SrcReg)) {
3581 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
3582 AArch64::zsub2, AArch64::zsub3};
3588 if (AArch64::GPR64spRegClass.
contains(DestReg) &&
3589 (AArch64::GPR64spRegClass.
contains(SrcReg) || SrcReg == AArch64::XZR)) {
3590 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
3596 }
else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
3610 if (AArch64::DDDDRegClass.
contains(DestReg) &&
3611 AArch64::DDDDRegClass.
contains(SrcReg)) {
3612 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
3613 AArch64::dsub2, AArch64::dsub3};
3620 if (AArch64::DDDRegClass.
contains(DestReg) &&
3621 AArch64::DDDRegClass.
contains(SrcReg)) {
3622 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
3630 if (AArch64::DDRegClass.
contains(DestReg) &&
3631 AArch64::DDRegClass.
contains(SrcReg)) {
3632 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
3639 if (AArch64::QQQQRegClass.
contains(DestReg) &&
3640 AArch64::QQQQRegClass.
contains(SrcReg)) {
3641 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
3642 AArch64::qsub2, AArch64::qsub3};
3649 if (AArch64::QQQRegClass.
contains(DestReg) &&
3650 AArch64::QQQRegClass.
contains(SrcReg)) {
3651 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
3659 if (AArch64::QQRegClass.
contains(DestReg) &&
3660 AArch64::QQRegClass.
contains(SrcReg)) {
3661 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
3667 if (AArch64::XSeqPairsClassRegClass.
contains(DestReg) &&
3668 AArch64::XSeqPairsClassRegClass.
contains(SrcReg)) {
3669 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
3671 AArch64::XZR, Indices);
3675 if (AArch64::WSeqPairsClassRegClass.
contains(DestReg) &&
3676 AArch64::WSeqPairsClassRegClass.
contains(SrcReg)) {
3677 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
3679 AArch64::WZR, Indices);
3683 if (AArch64::FPR128RegClass.
contains(DestReg) &&
3684 AArch64::FPR128RegClass.
contains(SrcReg)) {
3688 .
addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
3689 .
addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
3690 }
else if (Subtarget.hasNEON()) {
3709 if (AArch64::FPR64RegClass.
contains(DestReg) &&
3710 AArch64::FPR64RegClass.
contains(SrcReg)) {
3716 if (AArch64::FPR32RegClass.
contains(DestReg) &&
3717 AArch64::FPR32RegClass.
contains(SrcReg)) {
3723 if (AArch64::FPR16RegClass.
contains(DestReg) &&
3724 AArch64::FPR16RegClass.
contains(SrcReg)) {
3726 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
3728 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
3734 if (AArch64::FPR8RegClass.
contains(DestReg) &&
3735 AArch64::FPR8RegClass.
contains(SrcReg)) {
3737 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
3739 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
3746 if (AArch64::FPR64RegClass.
contains(DestReg) &&
3747 AArch64::GPR64RegClass.
contains(SrcReg)) {
3752 if (AArch64::GPR64RegClass.
contains(DestReg) &&
3753 AArch64::FPR64RegClass.
contains(SrcReg)) {
3759 if (AArch64::FPR32RegClass.
contains(DestReg) &&
3760 AArch64::GPR32RegClass.
contains(SrcReg)) {
3765 if (AArch64::GPR32RegClass.
contains(DestReg) &&
3766 AArch64::FPR32RegClass.
contains(SrcReg)) {
3772 if (DestReg == AArch64::NZCV) {
3773 assert(AArch64::GPR64RegClass.
contains(SrcReg) &&
"Invalid NZCV copy");
3775 .
addImm(AArch64SysReg::NZCV)
3781 if (SrcReg == AArch64::NZCV) {
3782 assert(AArch64::GPR64RegClass.
contains(DestReg) &&
"Invalid NZCV copy");
3784 .
addImm(AArch64SysReg::NZCV)
3791 errs() <<
TRI.getRegAsmName(DestReg) <<
" = COPY "
3792 <<
TRI.getRegAsmName(SrcReg) <<
"\n";
3802 unsigned SubIdx0,
unsigned SubIdx1,
int FI,
3807 SrcReg0 =
TRI.getSubReg(SrcReg, SubIdx0);
3809 SrcReg1 =
TRI.getSubReg(SrcReg, SubIdx1);
3822 Register SrcReg,
bool isKill,
int FI,
3836 switch (
TRI->getSpillSize(*RC)) {
3838 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3839 Opc = AArch64::STRBui;
3842 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3843 Opc = AArch64::STRHui;
3844 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3845 assert(Subtarget.hasSVE() &&
"Unexpected register store without SVE");
3846 Opc = AArch64::STR_PXI;
3851 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3852 Opc = AArch64::STRWui;
3856 assert(SrcReg != AArch64::WSP);
3857 }
else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3858 Opc = AArch64::STRSui;
3861 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3862 Opc = AArch64::STRXui;
3866 assert(SrcReg != AArch64::SP);
3867 }
else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3868 Opc = AArch64::STRDui;
3869 }
else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3871 get(AArch64::STPWi), SrcReg, isKill,
3872 AArch64::sube32, AArch64::subo32, FI, MMO);
3877 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3878 Opc = AArch64::STRQui;
3879 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3880 assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
3881 Opc = AArch64::ST1Twov1d;
3883 }
else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3885 get(AArch64::STPXi), SrcReg, isKill,
3886 AArch64::sube64, AArch64::subo64, FI, MMO);
3888 }
else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3889 assert(Subtarget.hasSVE() &&
"Unexpected register store without SVE");
3890 Opc = AArch64::STR_ZXI;
3895 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3896 assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
3897 Opc = AArch64::ST1Threev1d;
3902 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3903 assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
3904 Opc = AArch64::ST1Fourv1d;
3906 }
else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3907 assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
3908 Opc = AArch64::ST1Twov2d;
3910 }
else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
3911 assert(Subtarget.hasSVE() &&
"Unexpected register store without SVE");
3912 Opc = AArch64::STR_ZZXI;
3917 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3918 assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
3919 Opc = AArch64::ST1Threev2d;
3921 }
else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
3922 assert(Subtarget.hasSVE() &&
"Unexpected register store without SVE");
3923 Opc = AArch64::STR_ZZZXI;
3928 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3929 assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
3930 Opc = AArch64::ST1Fourv2d;
3932 }
else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
3933 assert(Subtarget.hasSVE() &&
"Unexpected register store without SVE");
3934 Opc = AArch64::STR_ZZZZXI;
3939 assert(Opc &&
"Unknown register class");
3948 MI.addMemOperand(MMO);
3955 Register DestReg,
unsigned SubIdx0,
3956 unsigned SubIdx1,
int FI,
3960 bool IsUndef =
true;
3962 DestReg0 =
TRI.getSubReg(DestReg, SubIdx0);
3964 DestReg1 =
TRI.getSubReg(DestReg, SubIdx1);
3992 switch (
TRI->getSpillSize(*RC)) {
3994 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3995 Opc = AArch64::LDRBui;
3998 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3999 Opc = AArch64::LDRHui;
4000 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
4001 assert(Subtarget.hasSVE() &&
"Unexpected register load without SVE");
4002 Opc = AArch64::LDR_PXI;
4007 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4008 Opc = AArch64::LDRWui;
4012 assert(DestReg != AArch64::WSP);
4013 }
else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4014 Opc = AArch64::LDRSui;
4017 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4018 Opc = AArch64::LDRXui;
4022 assert(DestReg != AArch64::SP);
4023 }
else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4024 Opc = AArch64::LDRDui;
4025 }
else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4027 get(AArch64::LDPWi), DestReg, AArch64::sube32,
4028 AArch64::subo32, FI, MMO);
4033 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4034 Opc = AArch64::LDRQui;
4035 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4036 assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
4037 Opc = AArch64::LD1Twov1d;
4039 }
else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4041 get(AArch64::LDPXi), DestReg, AArch64::sube64,
4042 AArch64::subo64, FI, MMO);
4044 }
else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4045 assert(Subtarget.hasSVE() &&
"Unexpected register load without SVE");
4046 Opc = AArch64::LDR_ZXI;
4051 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4052 assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
4053 Opc = AArch64::LD1Threev1d;
4058 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4059 assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
4060 Opc = AArch64::LD1Fourv1d;
4062 }
else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4063 assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
4064 Opc = AArch64::LD1Twov2d;
4066 }
else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
4067 assert(Subtarget.hasSVE() &&
"Unexpected register load without SVE");
4068 Opc = AArch64::LDR_ZZXI;
4073 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4074 assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
4075 Opc = AArch64::LD1Threev2d;
4077 }
else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4078 assert(Subtarget.hasSVE() &&
"Unexpected register load without SVE");
4079 Opc = AArch64::LDR_ZZZXI;
4084 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4085 assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
4086 Opc = AArch64::LD1Fourv2d;
4088 }
else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
4089 assert(Subtarget.hasSVE() &&
"Unexpected register load without SVE");
4090 Opc = AArch64::LDR_ZZZZXI;
4096 assert(Opc &&
"Unknown register class");
4104 MI.addMemOperand(MMO);
4111 UseMI.getIterator()),
4113 return I.modifiesRegister(AArch64::NZCV, TRI) ||
4114 I.readsRegister(AArch64::NZCV, TRI);
4123 assert(
Offset.getScalable() % 2 == 0 &&
"Invalid frame offset");
4130 ByteSized =
Offset.getFixed();
4131 VGSized =
Offset.getScalable() / 2;
4139 int64_t &NumDataVectors) {
4143 assert(
Offset.getScalable() % 2 == 0 &&
"Invalid frame offset");
4145 NumBytes =
Offset.getFixed();
4147 NumPredicateVectors =
Offset.getScalable() / 2;
4152 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
4153 NumPredicateVectors > 62) {
4154 NumDataVectors = NumPredicateVectors / 8;
4155 NumPredicateVectors -= NumDataVectors * 8;
4162 int NumVGScaledBytes,
unsigned VG,
4169 Expr.
push_back((uint8_t)dwarf::DW_OP_plus);
4170 Comment << (NumBytes < 0 ?
" - " :
" + ") << std::abs(NumBytes);
4173 if (NumVGScaledBytes) {
4174 Expr.
push_back((uint8_t)dwarf::DW_OP_consts);
4177 Expr.
push_back((uint8_t)dwarf::DW_OP_bregx);
4181 Expr.
push_back((uint8_t)dwarf::DW_OP_mul);
4182 Expr.
push_back((uint8_t)dwarf::DW_OP_plus);
4184 Comment << (NumVGScaledBytes < 0 ?
" - " :
" + ")
4185 << std::abs(NumVGScaledBytes) <<
" * VG";
4194 int64_t NumBytes, NumVGScaledBytes;
4197 std::string CommentBuffer;
4200 if (Reg == AArch64::SP)
4202 else if (Reg == AArch64::FP)
4209 unsigned DwarfReg =
TRI.getDwarfRegNum(Reg,
true);
4210 Expr.
push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
4213 TRI.getDwarfRegNum(AArch64::VG,
true), Comment);
4217 DefCfaExpr.
push_back(dwarf::DW_CFA_def_cfa_expression);
4226 unsigned FrameReg,
unsigned Reg,
4228 bool LastAdjustmentWasScalable) {
4229 if (
Offset.getScalable())
4232 if (FrameReg == Reg && !LastAdjustmentWasScalable)
4235 unsigned DwarfReg =
TRI.getDwarfRegNum(Reg,
true);
4242 int64_t NumBytes, NumVGScaledBytes;
4244 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
4246 unsigned DwarfReg =
TRI.getDwarfRegNum(Reg,
true);
4249 if (!NumVGScaledBytes)
4252 std::string CommentBuffer;
4259 TRI.getDwarfRegNum(AArch64::VG,
true), Comment);
4263 CfaExpr.
push_back(dwarf::DW_CFA_expression);
4278 unsigned SrcReg, int64_t
Offset,
unsigned Opc,
4281 bool *HasWinCFI,
bool EmitCFAOffset,
4284 unsigned MaxEncoding, ShiftSize;
4286 case AArch64::ADDXri:
4287 case AArch64::ADDSXri:
4288 case AArch64::SUBXri:
4289 case AArch64::SUBSXri:
4290 MaxEncoding = 0xfff;
4293 case AArch64::ADDVL_XXI:
4294 case AArch64::ADDPL_XXI:
4295 case AArch64::ADDSVL_XXI:
4296 case AArch64::ADDSPL_XXI:
4311 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
4313 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
4327 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
4329 if (TmpReg == AArch64::XZR)
4331 &AArch64::GPR64RegClass);
4333 uint64_t ThisVal = std::min<uint64_t>(
Offset, MaxEncodableValue);
4334 unsigned LocalShiftSize = 0;
4335 if (ThisVal > MaxEncoding) {
4336 ThisVal = ThisVal >> ShiftSize;
4337 LocalShiftSize = ShiftSize;
4339 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
4340 "Encoding cannot handle value that big");
4342 Offset -= ThisVal << LocalShiftSize;
4347 .
addImm(Sign * (
int)ThisVal);
4357 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
4358 CFAOffset += Change;
4360 CFAOffset -= Change;
4361 if (EmitCFAOffset && DestReg == TmpReg) {
4374 assert(Sign == 1 &&
"SEH directives should always have a positive sign");
4375 int Imm = (int)(ThisVal << LocalShiftSize);
4376 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
4377 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
4386 assert(
Offset == 0 &&
"Expected remaining offset to be zero to "
4387 "emit a single SEH directive");
4388 }
else if (DestReg == AArch64::SP) {
4391 assert(SrcReg == AArch64::SP &&
"Unexpected SrcReg for SEH_StackAlloc");
4404 unsigned DestReg,
unsigned SrcReg,
4407 bool NeedsWinCFI,
bool *HasWinCFI,
4409 unsigned FrameReg) {
4416 bool UseSVL =
F.hasFnAttribute(
"aarch64_pstate_sm_body");
4418 int64_t Bytes, NumPredicateVectors, NumDataVectors;
4420 Offset, Bytes, NumPredicateVectors, NumDataVectors);
4423 if (Bytes || (!
Offset && SrcReg != DestReg)) {
4424 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
4425 "SP increment/decrement not 8-byte aligned");
4426 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
4429 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
4432 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
4434 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
4441 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
4442 "SetNZCV not supported with SVE vectors");
4443 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
4444 "WinCFI not supported with SVE vectors");
4446 if (NumDataVectors) {
4448 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
4449 TII, Flag, NeedsWinCFI,
nullptr, EmitCFAOffset,
4450 CFAOffset, FrameReg);
4455 if (NumPredicateVectors) {
4456 assert(DestReg != AArch64::SP &&
"Unaligned access to SP");
4458 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
4459 TII, Flag, NeedsWinCFI,
nullptr, EmitCFAOffset,
4460 CFAOffset, FrameReg);
4481 if (
MI.isFullCopy()) {
4484 if (SrcReg == AArch64::SP && DstReg.
isVirtual()) {
4488 if (DstReg == AArch64::SP && SrcReg.
isVirtual()) {
4493 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
4521 if (
MI.isCopy() && Ops.
size() == 1 &&
4523 (Ops[0] == 0 || Ops[0] == 1)) {
4524 bool IsSpill = Ops[0] == 0;
4525 bool IsFill = !IsSpill;
4537 :
TRI.getMinimalPhysRegClass(Reg);
4543 "Mismatched register size in non subreg COPY");
4550 return &*--InsertPt;
4564 "Unexpected subreg on physical register");
4566 unsigned SpillSubreg;
4571 case AArch64::sub_32:
4573 if (AArch64::GPR32RegClass.
contains(SrcReg)) {
4574 SpillRC = &AArch64::GPR64RegClass;
4575 SpillSubreg = AArch64::sub_32;
4576 }
else if (AArch64::FPR32RegClass.
contains(SrcReg)) {
4577 SpillRC = &AArch64::FPR64RegClass;
4578 SpillSubreg = AArch64::ssub;
4583 if (AArch64::FPR64RegClass.
contains(SrcReg)) {
4584 SpillRC = &AArch64::FPR128RegClass;
4585 SpillSubreg = AArch64::dsub;
4592 if (
unsigned WidenedSrcReg =
4593 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
4596 return &*--InsertPt;
4615 case AArch64::sub_32:
4616 FillRC = &AArch64::GPR32RegClass;
4619 FillRC = &AArch64::FPR32RegClass;
4622 FillRC = &AArch64::FPR64RegClass;
4628 TRI.getRegSizeInBits(*FillRC) &&
4629 "Mismatched regclass size on folded subreg COPY");
4648 bool *OutUseUnscaledOp,
4649 unsigned *OutUnscaledOp,
4650 int64_t *EmittableOffset) {
4652 if (EmittableOffset)
4653 *EmittableOffset = 0;
4654 if (OutUseUnscaledOp)
4655 *OutUseUnscaledOp =
false;
4661 switch (
MI.getOpcode()) {
4664 case AArch64::LD1Twov2d:
4665 case AArch64::LD1Threev2d:
4666 case AArch64::LD1Fourv2d:
4667 case AArch64::LD1Twov1d:
4668 case AArch64::LD1Threev1d:
4669 case AArch64::LD1Fourv1d:
4670 case AArch64::ST1Twov2d:
4671 case AArch64::ST1Threev2d:
4672 case AArch64::ST1Fourv2d:
4673 case AArch64::ST1Twov1d:
4674 case AArch64::ST1Threev1d:
4675 case AArch64::ST1Fourv1d:
4676 case AArch64::ST1i8:
4677 case AArch64::ST1i16:
4678 case AArch64::ST1i32:
4679 case AArch64::ST1i64:
4681 case AArch64::IRGstack:
4682 case AArch64::STGloop:
4683 case AArch64::STZGloop:
4690 int64_t MinOff, MaxOff;
4707 std::optional<unsigned> UnscaledOp =
4709 bool useUnscaledOp = UnscaledOp && (
Offset % Scale ||
Offset < 0);
4710 if (useUnscaledOp &&
4717 "Unscaled opcode has different value for scalable");
4719 int64_t Remainder =
Offset % Scale;
4720 assert(!(Remainder && useUnscaledOp) &&
4721 "Cannot have remainder when using unscaled op");
4723 assert(MinOff < MaxOff &&
"Unexpected Min/Max offsets");
4724 int64_t NewOffset =
Offset / Scale;
4725 if (MinOff <= NewOffset && NewOffset <= MaxOff)
4728 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
4732 if (EmittableOffset)
4733 *EmittableOffset = NewOffset;
4734 if (OutUseUnscaledOp)
4735 *OutUseUnscaledOp = useUnscaledOp;
4736 if (OutUnscaledOp && UnscaledOp)
4737 *OutUnscaledOp = *UnscaledOp;
4750 unsigned Opcode =
MI.getOpcode();
4751 unsigned ImmIdx = FrameRegIdx + 1;
4753 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
4758 MI.eraseFromParent();
4764 unsigned UnscaledOp;
4767 &UnscaledOp, &NewOffset);
4771 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg,
false);
4773 MI.setDesc(
TII->get(UnscaledOp));
4775 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
4792 case AArch64::ADDSWrr:
4793 case AArch64::ADDSWri:
4794 case AArch64::ADDSXrr:
4795 case AArch64::ADDSXri:
4796 case AArch64::SUBSWrr:
4797 case AArch64::SUBSXrr:
4799 case AArch64::SUBSWri:
4800 case AArch64::SUBSXri:
4811 case AArch64::ADDWrr:
4812 case AArch64::ADDWri:
4813 case AArch64::SUBWrr:
4814 case AArch64::ADDSWrr:
4815 case AArch64::ADDSWri:
4816 case AArch64::SUBSWrr:
4818 case AArch64::SUBWri:
4819 case AArch64::SUBSWri:
4830 case AArch64::ADDXrr:
4831 case AArch64::ADDXri:
4832 case AArch64::SUBXrr:
4833 case AArch64::ADDSXrr:
4834 case AArch64::ADDSXri:
4835 case AArch64::SUBSXrr:
4837 case AArch64::SUBXri:
4838 case AArch64::SUBSXri:
4839 case AArch64::ADDv8i8:
4840 case AArch64::ADDv16i8:
4841 case AArch64::ADDv4i16:
4842 case AArch64::ADDv8i16:
4843 case AArch64::ADDv2i32:
4844 case AArch64::ADDv4i32:
4845 case AArch64::SUBv8i8:
4846 case AArch64::SUBv16i8:
4847 case AArch64::SUBv4i16:
4848 case AArch64::SUBv8i16:
4849 case AArch64::SUBv2i32:
4850 case AArch64::SUBv4i32:
4863 case AArch64::FADDHrr:
4864 case AArch64::FADDSrr:
4865 case AArch64::FADDDrr:
4866 case AArch64::FADDv4f16:
4867 case AArch64::FADDv8f16:
4868 case AArch64::FADDv2f32:
4869 case AArch64::FADDv2f64:
4870 case AArch64::FADDv4f32:
4871 case AArch64::FSUBHrr:
4872 case AArch64::FSUBSrr:
4873 case AArch64::FSUBDrr:
4874 case AArch64::FSUBv4f16:
4875 case AArch64::FSUBv8f16:
4876 case AArch64::FSUBv2f32:
4877 case AArch64::FSUBv2f64:
4878 case AArch64::FSUBv4f32:
4882 return Options.UnsafeFPMath ||
4899 unsigned CombineOpc,
unsigned ZeroReg = 0,
4900 bool CheckZeroReg =
false) {
4907 if (!
MI ||
MI->getParent() != &
MBB || (
unsigned)
MI->getOpcode() != CombineOpc)
4910 if (!
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
4914 assert(
MI->getNumOperands() >= 4 &&
MI->getOperand(0).isReg() &&
4915 MI->getOperand(1).isReg() &&
MI->getOperand(2).isReg() &&
4916 MI->getOperand(3).isReg() &&
"MAdd/MSub must have a least 4 regs");
4918 if (
MI->getOperand(3).getReg() != ZeroReg)
4923 MI->findRegisterDefOperandIdx(AArch64::NZCV,
true) == -1)
4932 unsigned MulOpc,
unsigned ZeroReg) {
4948 bool Invert)
const {
4954 case AArch64::FADDHrr:
4955 case AArch64::FADDSrr:
4956 case AArch64::FADDDrr:
4957 case AArch64::FMULHrr:
4958 case AArch64::FMULSrr:
4959 case AArch64::FMULDrr:
4960 case AArch64::FMULX16:
4961 case AArch64::FMULX32:
4962 case AArch64::FMULX64:
4964 case AArch64::FADDv4f16:
4965 case AArch64::FADDv8f16:
4966 case AArch64::FADDv2f32:
4967 case AArch64::FADDv4f32:
4968 case AArch64::FADDv2f64:
4969 case AArch64::FMULv4f16:
4970 case AArch64::FMULv8f16:
4971 case AArch64::FMULv2f32:
4972 case AArch64::FMULv4f32:
4973 case AArch64::FMULv2f64:
4974 case AArch64::FMULXv4f16:
4975 case AArch64::FMULXv8f16:
4976 case AArch64::FMULXv2f32:
4977 case AArch64::FMULXv4f32:
4978 case AArch64::FMULXv2f64:
4982 case AArch64::FADD_ZZZ_H:
4983 case AArch64::FADD_ZZZ_S:
4984 case AArch64::FADD_ZZZ_D:
4985 case AArch64::FMUL_ZZZ_H:
4986 case AArch64::FMUL_ZZZ_S:
4987 case AArch64::FMUL_ZZZ_D:
4999 case AArch64::ADDWrr:
5000 case AArch64::ADDXrr:
5001 case AArch64::ANDWrr:
5002 case AArch64::ANDXrr:
5003 case AArch64::ORRWrr:
5004 case AArch64::ORRXrr:
5005 case AArch64::EORWrr:
5006 case AArch64::EORXrr:
5007 case AArch64::EONWrr:
5008 case AArch64::EONXrr:
5012 case AArch64::ADDv8i8:
5013 case AArch64::ADDv16i8:
5014 case AArch64::ADDv4i16:
5015 case AArch64::ADDv8i16:
5016 case AArch64::ADDv2i32:
5017 case AArch64::ADDv4i32:
5018 case AArch64::ADDv1i64:
5019 case AArch64::ADDv2i64:
5020 case AArch64::MULv8i8:
5021 case AArch64::MULv16i8:
5022 case AArch64::MULv4i16:
5023 case AArch64::MULv8i16:
5024 case AArch64::MULv2i32:
5025 case AArch64::MULv4i32:
5026 case AArch64::ANDv8i8:
5027 case AArch64::ANDv16i8:
5028 case AArch64::ORRv8i8:
5029 case AArch64::ORRv16i8:
5030 case AArch64::EORv8i8:
5031 case AArch64::EORv16i8:
5033 case AArch64::ADD_ZZZ_B:
5034 case AArch64::ADD_ZZZ_H:
5035 case AArch64::ADD_ZZZ_S:
5036 case AArch64::ADD_ZZZ_D:
5037 case AArch64::MUL_ZZZ_B:
5038 case AArch64::MUL_ZZZ_H:
5039 case AArch64::MUL_ZZZ_S:
5040 case AArch64::MUL_ZZZ_D:
5041 case AArch64::AND_ZZZ:
5042 case AArch64::ORR_ZZZ:
5043 case AArch64::EOR_ZZZ:
5073 auto setFound = [&](
int Opcode,
int Operand,
unsigned ZeroReg,