74 #define DEBUG_TYPE "arm-ldst-opt"
76 STATISTIC(NumLDMGened ,
"Number of ldm instructions generated");
77 STATISTIC(NumSTMGened ,
"Number of stm instructions generated");
78 STATISTIC(NumVLDMGened,
"Number of vldm instructions generated");
79 STATISTIC(NumVSTMGened,
"Number of vstm instructions generated");
80 STATISTIC(NumLdStMoved,
"Number of load / store instructions moved");
81 STATISTIC(NumLDRDFormed,
"Number of ldrd created before allocation");
82 STATISTIC(NumSTRDFormed,
"Number of strd created before allocation");
83 STATISTIC(NumLDRD2LDM,
"Number of ldrd instructions turned back into ldm");
84 STATISTIC(NumSTRD2STM,
"Number of strd instructions turned back into stm");
85 STATISTIC(NumLDRD2LDR,
"Number of ldrd instructions turned back into ldr's");
86 STATISTIC(NumSTRD2STR,
"Number of strd instructions turned back into str's");
97 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
116 bool RegClassInfoValid;
117 bool isThumb1, isThumb2;
133 struct MemOpQueueEntry {
139 :
MI(&
MI), Offset(Offset), Position(Position) {}
145 struct MergeCandidate {
150 unsigned LatestMIIdx;
153 unsigned EarliestMIIdx;
160 bool CanMergeToLSMulti;
163 bool CanMergeToLSDouble;
174 unsigned Base,
unsigned WordOffset,
178 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
180 ArrayRef<std::pair<unsigned, bool>> Regs,
184 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
186 ArrayRef<std::pair<unsigned, bool>> Regs,
188 void FormCandidates(
const MemOpQueue &MemOps);
189 MachineInstr *MergeOpsUpdate(
const MergeCandidate &Cand);
208 for (
const auto &MO :
MI.operands()) {
211 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
221 unsigned Opcode =
MI.getOpcode();
223 unsigned NumOperands =
MI.getDesc().getNumOperands();
224 unsigned OffField =
MI.getOperand(NumOperands - 3).getImm();
226 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
227 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
228 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
229 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
233 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
234 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
249 return MI.getOperand(1);
253 return MI.getOperand(0);
350 case ARM::tLDMIA_UPD:
351 case ARM::tSTMIA_UPD:
352 case ARM::t2LDMIA_RET:
354 case ARM::t2LDMIA_UPD:
356 case ARM::t2STMIA_UPD:
358 case ARM::VLDMSIA_UPD:
360 case ARM::VSTMSIA_UPD:
362 case ARM::VLDMDIA_UPD:
364 case ARM::VSTMDIA_UPD:
378 case ARM::t2LDMDB_UPD:
380 case ARM::t2STMDB_UPD:
381 case ARM::VLDMSDB_UPD:
382 case ARM::VSTMSDB_UPD:
383 case ARM::VLDMDDB_UPD:
384 case ARM::VSTMDDB_UPD:
396 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
400 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
408 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
412 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
420 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
441 switch (
MI->getOpcode()) {
468 case ARM::tLDMIA_UPD:
469 case ARM::tSTMIA_UPD:
476 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 4;
479 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 8;
491 assert(isThumb1 &&
"Can only update base register uses for Thumb1!");
495 bool InsertSub =
false;
496 unsigned Opc =
MBBI->getOpcode();
501 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
503 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
505 if (IsLoad || IsStore) {
511 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
518 if (Offset >= 0 && !(IsStore && InstrSrcReg ==
Base))
522 }
else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
523 !definesCPSR(*
MBBI)) {
528 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
529 Offset = (Opc == ARM::tSUBi8) ?
530 MO.
getImm() + WordOffset * 4 :
531 MO.
getImm() - WordOffset * 4 ;
532 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
545 }
else if (definesCPSR(*
MBBI) ||
MBBI->isCall() ||
MBBI->isBranch()) {
586 if (!RegClassInfoValid) {
587 RegClassInfo.runOnMachineFunction(*MF);
588 RegClassInfoValid =
true;
591 for (
unsigned Reg : RegClassInfo.getOrder(&RegClass))
592 if (LiveRegs.available(MF->getRegInfo(),
Reg))
603 if (!LiveRegsValid) {
605 LiveRegs.addLiveOuts(
MBB);
607 LiveRegsValid =
true;
610 while (LiveRegPos != Before) {
612 LiveRegs.stepBackward(*LiveRegPos);
618 for (
const std::pair<unsigned, bool> &R : Regs)
629 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
631 ArrayRef<std::pair<unsigned, bool>> Regs,
633 unsigned NumRegs = Regs.size();
638 bool SafeToClobberCPSR = !isThumb1 ||
642 bool Writeback = isThumb1;
648 assert(
Base != ARM::SP &&
"Thumb1 does not allow SP in register list");
649 if (Opcode == ARM::tLDRi)
651 else if (Opcode == ARM::tSTRi)
658 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
660 if (Offset == 4 && haveIBAndDA) {
662 }
else if (Offset == -4 * (
int)NumRegs + 4 && haveIBAndDA) {
664 }
else if (Offset == -4 * (
int)NumRegs && isNotVFP && !isThumb1) {
667 }
else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
680 if (!SafeToClobberCPSR)
687 NewBase = Regs[NumRegs-1].first;
691 moveLiveRegsBefore(
MBB, InsertBefore);
695 for (
const std::pair<unsigned, bool> &R : Regs)
696 LiveRegs.addReg(
R.first);
698 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
703 int BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2ADDspImm
705 : (isThumb1 &&
Base == ARM::SP)
707 : (isThumb1 &&
Offset < 8)
709 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
715 BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2SUBspImm
717 : (isThumb1 && Offset < 8 &&
Base != ARM::SP)
719 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
722 if (!TL->isLegalAddImmediate(Offset))
728 bool KillOldBase = BaseKill &&
737 if (
Base != NewBase &&
738 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
756 if (BaseOpc == ARM::tADDrSPi) {
757 assert(Offset % 4 == 0 &&
"tADDrSPi offset is scaled by 4");
795 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
801 assert(isThumb1 &&
"expected Writeback only inThumb1");
802 if (Opcode == ARM::tLDMIA) {
805 Opcode = ARM::tLDMIA_UPD;
817 UpdateBaseRegUses(
MBB, InsertBefore,
DL,
Base, NumRegs, Pred, PredReg);
826 for (
const std::pair<unsigned, bool> &R : Regs)
836 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
838 ArrayRef<std::pair<unsigned, bool>> Regs,
841 assert((IsLoad ||
isi32Store(Opcode)) &&
"Must have integer load or store");
842 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
846 TII->get(LoadStoreOpcode));
860 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(
const MergeCandidate &Cand) {
862 unsigned Opcode =
First->getOpcode();
872 bool IsKill = MO.
isKill();
875 Regs.push_back(std::make_pair(
Reg, IsKill));
891 if (
MI->readsRegister(DefReg))
893 ImpDefs.push_back(DefReg);
902 iterator InsertBefore = std::next(iterator(LatestMI));
911 if (Cand.CanMergeToLSDouble)
912 Merged = CreateLoadStoreDouble(
MBB, InsertBefore, Offset,
Base, BaseKill,
913 Opcode, Pred, PredReg,
DL, Regs,
915 if (!Merged && Cand.CanMergeToLSMulti)
916 Merged = CreateLoadStoreMulti(
MBB, InsertBefore, Offset,
Base, BaseKill,
917 Opcode, Pred, PredReg,
DL, Regs, Cand.Instrs);
923 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
924 bool EarliestAtBegin =
false;
926 EarliestAtBegin =
true;
928 EarliestI = std::prev(EarliestI);
939 EarliestI = std::next(EarliestI);
940 auto FixupRange =
make_range(EarliestI, iterator(Merged));
946 for (
unsigned &ImpDefReg : ImpDefs) {
959 for (
unsigned ImpDef : ImpDefs)
991 unsigned Opcode =
MI.getOpcode();
1004 void ARMLoadStoreOpt::FormCandidates(
const MemOpQueue &MemOps) {
1010 unsigned SIndex = 0;
1011 unsigned EIndex = MemOps.size();
1015 int Offset = MemOps[SIndex].Offset;
1019 :
TRI->getEncodingValue(PReg);
1020 unsigned Latest = SIndex;
1021 unsigned Earliest = SIndex;
1023 bool CanMergeToLSDouble =
1027 if (STI->isCortexM3() &&
isi32Load(Opcode) &&
1029 CanMergeToLSDouble =
false;
1031 bool CanMergeToLSMulti =
true;
1034 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1035 CanMergeToLSMulti =
false;
1039 if (PReg == ARM::SP || PReg == ARM::PC)
1040 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1044 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1059 for (
unsigned I = SIndex+1;
I < EIndex; ++
I, ++Count) {
1060 int NewOffset = MemOps[
I].Offset;
1061 if (NewOffset != Offset + (
int)Size)
1065 if (
Reg == ARM::SP ||
Reg == ARM::PC)
1072 :
TRI->getEncodingValue(
Reg);
1073 bool PartOfLSMulti = CanMergeToLSMulti;
1074 if (PartOfLSMulti) {
1076 if (RegNum <= PRegNum)
1077 PartOfLSMulti =
false;
1081 else if (!isNotVFP && RegNum != PRegNum+1)
1082 PartOfLSMulti =
false;
1085 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1087 if (!PartOfLSMulti && !PartOfLSDouble)
1089 CanMergeToLSMulti &= PartOfLSMulti;
1090 CanMergeToLSDouble &= PartOfLSDouble;
1093 unsigned Position = MemOps[
I].Position;
1094 if (Position < MemOps[Latest].Position)
1096 else if (Position > MemOps[Earliest].Position)
1104 MergeCandidate *Candidate =
new(
Allocator.Allocate()) MergeCandidate;
1105 for (
unsigned C = SIndex, CE = SIndex + Count;
C <
CE; ++
C)
1106 Candidate->Instrs.push_back(MemOps[
C].MI);
1107 Candidate->LatestMIIdx = Latest - SIndex;
1108 Candidate->EarliestMIIdx = Earliest - SIndex;
1109 Candidate->InsertPos = MemOps[Latest].Position;
1111 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1112 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1113 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1114 Candidates.push_back(Candidate);
1117 }
while (SIndex < EIndex);
1194 switch (
MI.getOpcode()) {
1195 case ARM::tADDi8: Scale = 4; CheckCPSRDef =
true;
break;
1196 case ARM::tSUBi8: Scale = -4; CheckCPSRDef =
true;
break;
1198 case ARM::t2SUBspImm:
1199 case ARM::SUBri: Scale = -1; CheckCPSRDef =
true;
break;
1201 case ARM::t2ADDspImm:
1202 case ARM::ADDri: Scale = 1; CheckCPSRDef =
true;
break;
1203 case ARM::tADDspi: Scale = 4; CheckCPSRDef =
false;
break;
1204 case ARM::tSUBspi: Scale = -4; CheckCPSRDef =
false;
break;
1209 if (
MI.getOperand(0).getReg() !=
Reg ||
1210 MI.getOperand(1).getReg() !=
Reg ||
1212 MIPredReg != PredReg)
1215 if (CheckCPSRDef && definesCPSR(
MI))
1217 return MI.getOperand(2).getImm() * Scale;
1228 if (
MBBI == BeginMBBI)
1233 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1237 return Offset == 0 ? EndMBBI : PrevMBBI;
1249 while (NextMBBI != EndMBBI) {
1251 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1253 if (NextMBBI == EndMBBI)
1267 if (
Reg == ARM::SP || NextMBBI->readsRegister(
Reg,
TRI) ||
1268 NextMBBI->definesRegister(
Reg,
TRI))
1288 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(
MachineInstr *
MI) {
1290 if (isThumb1)
return false;
1295 bool BaseKill = BaseOP.
isKill();
1298 unsigned Opcode =
MI->getOpcode();
1327 if (!STI->hasMinSize() || !BaseKill)
1330 bool HighRegsUsed =
false;
1332 if (MO.
getReg() >= ARM::R8) {
1333 HighRegsUsed =
true;
1343 if (MergeInstr !=
MBB.
end()) {
1370 return ARM::LDR_PRE_IMM;
1372 return ARM::STR_PRE_IMM;
1383 return ARM::t2LDR_PRE;
1386 return ARM::t2STR_PRE;
1395 return ARM::LDR_POST_IMM;
1397 return ARM::STR_POST_IMM;
1408 return ARM::t2LDR_POST;
1410 case ARM::t2LDRBi12:
1411 return ARM::t2LDRB_POST;
1412 case ARM::t2LDRSBi8:
1413 case ARM::t2LDRSBi12:
1414 return ARM::t2LDRSB_POST;
1416 case ARM::t2LDRHi12:
1417 return ARM::t2LDRH_POST;
1418 case ARM::t2LDRSHi8:
1419 case ARM::t2LDRSHi12:
1420 return ARM::t2LDRSH_POST;
1423 return ARM::t2STR_POST;
1425 case ARM::t2STRBi12:
1426 return ARM::t2STRB_POST;
1428 case ARM::t2STRHi12:
1429 return ARM::t2STRH_POST;
1431 case ARM::MVE_VLDRBS16:
1432 return ARM::MVE_VLDRBS16_post;
1433 case ARM::MVE_VLDRBS32:
1434 return ARM::MVE_VLDRBS32_post;
1435 case ARM::MVE_VLDRBU16:
1436 return ARM::MVE_VLDRBU16_post;
1437 case ARM::MVE_VLDRBU32:
1438 return ARM::MVE_VLDRBU32_post;
1439 case ARM::MVE_VLDRHS32:
1440 return ARM::MVE_VLDRHS32_post;
1441 case ARM::MVE_VLDRHU32:
1442 return ARM::MVE_VLDRHU32_post;
1443 case ARM::MVE_VLDRBU8:
1444 return ARM::MVE_VLDRBU8_post;
1445 case ARM::MVE_VLDRHU16:
1446 return ARM::MVE_VLDRHU16_post;
1447 case ARM::MVE_VLDRWU32:
1448 return ARM::MVE_VLDRWU32_post;
1449 case ARM::MVE_VSTRB16:
1450 return ARM::MVE_VSTRB16_post;
1451 case ARM::MVE_VSTRB32:
1452 return ARM::MVE_VSTRB32_post;
1453 case ARM::MVE_VSTRH32:
1454 return ARM::MVE_VSTRH32_post;
1455 case ARM::MVE_VSTRBU8:
1456 return ARM::MVE_VSTRBU8_post;
1457 case ARM::MVE_VSTRHU16:
1458 return ARM::MVE_VSTRHU16_post;
1459 case ARM::MVE_VSTRWU32:
1460 return ARM::MVE_VSTRWU32_post;
1468 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(
MachineInstr *
MI) {
1471 if (isThumb1)
return false;
1476 unsigned Opcode =
MI->getOpcode();
1478 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1479 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1480 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1482 if (
MI->getOperand(2).getImm() != 0)
1489 if (
MI->getOperand(0).getReg() ==
Base)
1501 if (!isAM5 && Offset == Bytes) {
1503 }
else if (Offset == -Bytes) {
1507 if (MergeInstr ==
MBB.
end())
1511 if ((isAM5 && Offset != Bytes) ||
1543 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1584 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1613 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(
MachineInstr &
MI)
const {
1614 unsigned Opcode =
MI.getOpcode();
1615 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1616 "Must have t2STRDi8 or t2LDRDi8");
1617 if (
MI.getOperand(3).getImm() != 0)
1638 if (Offset == 8 || Offset == -8) {
1639 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1642 if (MergeInstr ==
MBB.
end())
1644 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1653 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1656 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1661 assert(
TII->get(Opcode).getNumOperands() == 6 &&
1662 TII->get(NewOpc).getNumOperands() == 7 &&
1663 "Unexpected number of operands in Opcode specification.");
1678 unsigned Opcode =
MI.getOpcode();
1698 if (!
MI.getOperand(1).isReg())
1703 if (!
MI.hasOneMemOperand())
1722 if (
MI.getOperand(0).isReg() &&
MI.getOperand(0).isUndef())
1726 if (
MI.getOperand(1).isUndef())
1734 bool isDef,
unsigned NewOpc,
unsigned Reg,
1735 bool RegDeadKill,
bool RegUndef,
unsigned BaseReg,
1763 unsigned Opcode =
MI->getOpcode();
1771 Register EvenReg =
MI->getOperand(0).getReg();
1772 Register OddReg =
MI->getOperand(1).getReg();
1778 bool Errata602117 = EvenReg == BaseReg &&
1779 (Opcode ==
ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1782 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1784 if (!Errata602117 && !NonConsecutiveRegs)
1787 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1788 bool isLd = Opcode ==
ARM::LDRD || Opcode == ARM::t2LDRDi8;
1789 bool EvenDeadKill = isLd ?
1790 MI->getOperand(0).isDead() :
MI->getOperand(0).isKill();
1791 bool EvenUndef =
MI->getOperand(0).isUndef();
1792 bool OddDeadKill = isLd ?
1793 MI->getOperand(1).isDead() :
MI->getOperand(1).isKill();
1794 bool OddUndef =
MI->getOperand(1).isUndef();
1795 bool BaseKill = BaseOp.
isKill();
1796 bool BaseUndef = BaseOp.
isUndef();
1797 assert((isT2 ||
MI->getOperand(3).getReg() == ARM::NoRegister) &&
1798 "register offset not handled below");
1803 if (OddRegNum > EvenRegNum && OffImm == 0) {
1806 unsigned NewOpc = (isLd)
1807 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1808 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1830 unsigned NewOpc = (isLd)
1831 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1832 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1835 unsigned NewOpc2 = (isLd)
1836 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1837 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1843 false, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
MI);
1845 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1848 if (OddReg == EvenReg && EvenDeadKill) {
1852 EvenDeadKill =
false;
1856 if (EvenReg == BaseReg)
1857 EvenDeadKill =
false;
1859 EvenUndef, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
1862 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1879 unsigned CurrBase = 0;
1880 unsigned CurrOpc = ~0u;
1882 unsigned Position = 0;
1883 assert(Candidates.size() == 0);
1884 assert(MergeBaseCandidates.size() == 0);
1885 LiveRegsValid =
false;
1890 MBBI = std::prev(
I);
1891 if (FixInvalidRegPairOp(
MBB,
MBBI))
1896 unsigned Opcode =
MBBI->getOpcode();
1903 if (CurrBase == 0) {
1908 MemOps.push_back(MemOpQueueEntry(*
MBBI, Offset, Position));
1912 if (CurrOpc == Opcode && CurrBase ==
Base && CurrPred == Pred) {
1920 bool Overlap =
false;
1924 for (
const MemOpQueueEntry &
E : MemOps) {
1935 if (Offset > MemOps.back().Offset) {
1936 MemOps.push_back(MemOpQueueEntry(*
MBBI, Offset, Position));
1939 MemOpQueue::iterator
MI, ME;
1940 for (
MI = MemOps.begin(), ME = MemOps.end();
MI != ME; ++
MI) {
1941 if (Offset < MI->Offset) {
1945 if (Offset ==
MI->Offset) {
1951 if (
MI != MemOps.end()) {
1952 MemOps.insert(
MI, MemOpQueueEntry(*
MBBI, Offset, Position));
1963 }
else if (
MBBI->isDebugInstr()) {
1965 }
else if (
MBBI->getOpcode() == ARM::t2LDRDi8 ||
1966 MBBI->getOpcode() == ARM::t2STRDi8) {
1969 MergeBaseCandidates.push_back(&*
MBBI);
1973 if (MemOps.size() > 0) {
1974 FormCandidates(MemOps);
1982 if (MemOps.size() > 0)
1983 FormCandidates(MemOps);
1987 auto LessThan = [](
const MergeCandidate*
M0,
const MergeCandidate *
M1) {
1988 return M0->InsertPos <
M1->InsertPos;
1993 bool Changed =
false;
1994 for (
const MergeCandidate *Candidate : Candidates) {
1995 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
2001 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2002 MergeBaseUpdateLSDouble(*Merged);
2004 MergeBaseUpdateLSMultiple(Merged);
2007 if (MergeBaseUpdateLoadStore(
MI))
2012 assert(Candidate->Instrs.size() == 1);
2013 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2020 MergeBaseUpdateLSDouble(*
MI);
2021 MergeBaseCandidates.clear();
2038 if (isThumb1)
return false;
2043 (
MBBI->getOpcode() == ARM::BX_RET ||
2044 MBBI->getOpcode() == ARM::tBX_RET ||
2045 MBBI->getOpcode() == ARM::MOVPCLR)) {
2048 while (PrevI->isDebugInstr() && PrevI !=
MBB.
begin())
2052 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2053 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2054 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2056 if (MO.
getReg() != ARM::LR)
2058 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2059 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2060 Opcode == ARM::LDMIA_UPD) &&
"Unsupported multiple load-return!");
2071 if (
Info.getReg() == ARM::LR) {
2072 Info.setRestored(
false);
2085 MBBI->getOpcode() != ARM::tBX_RET)
2090 if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
2093 for (
auto Use : Prev->uses())
2095 assert(STI->hasV4TOps());
2116 TII = STI->getInstrInfo();
2117 TRI = STI->getRegisterInfo();
2119 RegClassInfoValid =
false;
2120 isThumb2 = AFI->isThumb2Function();
2121 isThumb1 = AFI->isThumbFunction() && !isThumb2;
2126 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
2136 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2137 "ARM pre- register allocation load / store optimization pass"
2159 StringRef getPassName()
const override {
2177 unsigned Base,
bool isLd,
2180 bool DistributeIncrements();
2199 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(
MachineFunction &Fn) {
2203 TD = &Fn.getDataLayout();
2205 TII = STI->getInstrInfo();
2206 TRI = STI->getRegisterInfo();
2207 MRI = &Fn.getRegInfo();
2208 DT = &getAnalysis<MachineDominatorTree>();
2210 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2212 bool Modified = DistributeIncrements();
2214 Modified |= RescheduleLoadStoreInstrs(&MFI);
2229 if (
I->isDebugInstr() || MemOps.
count(&*
I))
2231 if (
I->isCall() ||
I->isTerminator() ||
I->hasUnmodeledSideEffects())
2233 if (
I->mayStore() || (!isLd &&
I->mayLoad()))
2235 if (
I->mayAlias(
AA, *
MemOp,
false))
2237 for (
unsigned j = 0, NumOps =
I->getNumOperands();
j != NumOps; ++
j) {
2250 if (MemRegs.
size() <= 4)
2253 return AddedRegPressure.
size() <= MemRegs.
size() * 2;
2256 bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2261 if (!STI->hasV5TEOps())
2267 if (Opcode == ARM::LDRi12) {
2269 }
else if (Opcode == ARM::STRi12) {
2271 }
else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2272 NewOpc = ARM::t2LDRDi8;
2275 }
else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2276 NewOpc = ARM::t2STRDi8;
2296 if (Alignment < ReqAlign)
2302 int Limit = (1 << 8) * Scale;
2303 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2312 int Limit = (1 << 8) * Scale;
2313 if (OffImm >= Limit || (OffImm & (Scale-1)))
2319 if (FirstReg == SecondReg)
2329 unsigned Base,
bool isLd,
2331 bool RetVal =
false;
2338 return LOffset > ROffset;
2345 while (Ops.size() > 1) {
2346 unsigned FirstLoc = ~0U;
2347 unsigned LastLoc = 0;
2351 unsigned LastOpcode = 0;
2352 unsigned LastBytes = 0;
2353 unsigned NumMove = 0;
2358 if (LastOpcode && LSMOpcode != LastOpcode)
2365 if (Bytes != LastBytes || Offset != (LastOffset + (
int)Bytes))
2377 LastOpcode = LSMOpcode;
2379 unsigned Loc = MI2LocMap[
Op];
2380 if (Loc <= FirstLoc) {
2384 if (Loc >= LastLoc) {
2395 for (
size_t i = Ops.size() - NumMove,
e = Ops.size();
i !=
e; ++
i) {
2402 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4;
2405 MemOps, MemRegs,
TRI,
AA);
2407 for (
unsigned i = 0;
i != NumMove; ++
i)
2412 while (InsertPos !=
MBB->
end() &&
2413 (MemOps.
count(&*InsertPos) || InsertPos->isDebugInstr()))
2424 unsigned NewOpc = 0;
2427 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2428 FirstReg, SecondReg, BaseReg,
2429 Offset, PredReg, Pred, isT2)) {
2477 for (
unsigned i = 0;
i != NumMove; ++
i) {
2483 NumLdStMoved += NumMove;
2494 bool RetVal =
false;
2500 Base2InstMap Base2LdsMap;
2501 Base2InstMap Base2StsMap;
2511 if (
MI.isCall() ||
MI.isTerminator()) {
2517 if (!
MI.isDebugInstr())
2518 MI2LocMap[&
MI] = ++Loc;
2526 int Opc =
MI.getOpcode();
2530 bool StopHere =
false;
2531 auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
2532 MapIt BI = Base2Ops.find(
Base);
2533 if (BI == Base2Ops.end()) {
2534 Base2Ops[
Base].push_back(&
MI);
2535 Bases.push_back(
Base);
2538 for (
unsigned i = 0,
e = BI->second.size();
i !=
e; ++
i) {
2545 BI->second.push_back(&
MI);
2549 FindBases(Base2LdsMap, LdBases);
2551 FindBases(Base2StsMap, StBases);
2562 for (
unsigned i = 0,
e = LdBases.size();
i !=
e; ++
i) {
2563 unsigned Base = LdBases[
i];
2566 RetVal |= RescheduleOps(
MBB, Lds,
Base,
true, MI2LocMap);
2570 for (
unsigned i = 0,
e = StBases.size();
i !=
e; ++
i) {
2571 unsigned Base = StBases[
i];
2574 RetVal |= RescheduleOps(
MBB, Sts,
Base,
false, MI2LocMap);
2578 Base2LdsMap.clear();
2579 Base2StsMap.clear();
2593 switch (
MI.getOpcode()) {
2594 case ARM::MVE_VLDRBS16:
2595 case ARM::MVE_VLDRBS32:
2596 case ARM::MVE_VLDRBU16:
2597 case ARM::MVE_VLDRBU32:
2598 case ARM::MVE_VLDRHS32:
2599 case ARM::MVE_VLDRHU32:
2600 case ARM::MVE_VLDRBU8:
2601 case ARM::MVE_VLDRHU16:
2602 case ARM::MVE_VLDRWU32:
2603 case ARM::MVE_VSTRB16:
2604 case ARM::MVE_VSTRB32:
2605 case ARM::MVE_VSTRH32:
2606 case ARM::MVE_VSTRBU8:
2607 case ARM::MVE_VSTRHU16:
2608 case ARM::MVE_VSTRWU32:
2610 case ARM::t2LDRHi12:
2611 case ARM::t2LDRSHi8:
2612 case ARM::t2LDRSHi12:
2614 case ARM::t2LDRBi12:
2615 case ARM::t2LDRSBi8:
2616 case ARM::t2LDRSBi12:
2618 case ARM::t2STRBi12:
2620 case ARM::t2STRHi12:
2622 case ARM::MVE_VLDRBS16_post:
2623 case ARM::MVE_VLDRBS32_post:
2624 case ARM::MVE_VLDRBU16_post:
2625 case ARM::MVE_VLDRBU32_post:
2626 case ARM::MVE_VLDRHS32_post:
2627 case ARM::MVE_VLDRHU32_post:
2628 case ARM::MVE_VLDRBU8_post:
2629 case ARM::MVE_VLDRHU16_post:
2630 case ARM::MVE_VLDRWU32_post:
2631 case ARM::MVE_VSTRB16_post:
2632 case ARM::MVE_VSTRB32_post:
2633 case ARM::MVE_VSTRH32_post:
2634 case ARM::MVE_VSTRBU8_post:
2635 case ARM::MVE_VSTRHU16_post:
2636 case ARM::MVE_VSTRWU32_post:
2637 case ARM::MVE_VLDRBS16_pre:
2638 case ARM::MVE_VLDRBS32_pre:
2639 case ARM::MVE_VLDRBU16_pre:
2640 case ARM::MVE_VLDRBU32_pre:
2641 case ARM::MVE_VLDRHS32_pre:
2642 case ARM::MVE_VLDRHU32_pre:
2643 case ARM::MVE_VLDRBU8_pre:
2644 case ARM::MVE_VLDRHU16_pre:
2645 case ARM::MVE_VLDRWU32_pre:
2646 case ARM::MVE_VSTRB16_pre:
2647 case ARM::MVE_VSTRB32_pre:
2648 case ARM::MVE_VSTRH32_pre:
2649 case ARM::MVE_VSTRBU8_pre:
2650 case ARM::MVE_VSTRHU16_pre:
2651 case ARM::MVE_VSTRWU32_pre:
2658 switch (
MI.getOpcode()) {
2659 case ARM::MVE_VLDRBS16_post:
2660 case ARM::MVE_VLDRBS32_post:
2661 case ARM::MVE_VLDRBU16_post:
2662 case ARM::MVE_VLDRBU32_post:
2663 case ARM::MVE_VLDRHS32_post:
2664 case ARM::MVE_VLDRHU32_post:
2665 case ARM::MVE_VLDRBU8_post:
2666 case ARM::MVE_VLDRHU16_post:
2667 case ARM::MVE_VLDRWU32_post:
2668 case ARM::MVE_VSTRB16_post:
2669 case ARM::MVE_VSTRB32_post:
2670 case ARM::MVE_VSTRH32_post:
2671 case ARM::MVE_VSTRBU8_post:
2672 case ARM::MVE_VSTRHU16_post:
2673 case ARM::MVE_VSTRWU32_post:
2680 switch (
MI.getOpcode()) {
2681 case ARM::MVE_VLDRBS16_pre:
2682 case ARM::MVE_VLDRBS32_pre:
2683 case ARM::MVE_VLDRBU16_pre:
2684 case ARM::MVE_VLDRBU32_pre:
2685 case ARM::MVE_VLDRHS32_pre:
2686 case ARM::MVE_VLDRHU32_pre:
2687 case ARM::MVE_VLDRBU8_pre:
2688 case ARM::MVE_VLDRHU16_pre:
2689 case ARM::MVE_VLDRWU32_pre:
2690 case ARM::MVE_VSTRB16_pre:
2691 case ARM::MVE_VSTRB32_pre:
2692 case ARM::MVE_VSTRH32_pre:
2693 case ARM::MVE_VSTRBU8_pre:
2694 case ARM::MVE_VSTRHU16_pre:
2695 case ARM::MVE_VSTRWU32_pre:
2708 int &CodesizeEstimate) {
2717 CodesizeEstimate += 1;
2718 return Imm < 0 && -
Imm < ((1 << 8) * 1);
2731 MI->getOperand(BaseOp).setReg(NewBaseReg);
2739 int OldOffset =
MI->getOperand(BaseOp + 1).getImm();
2741 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
2743 unsigned ConvOpcode;
2744 switch (
MI->getOpcode()) {
2745 case ARM::t2LDRHi12:
2746 ConvOpcode = ARM::t2LDRHi8;
2748 case ARM::t2LDRSHi12:
2749 ConvOpcode = ARM::t2LDRSHi8;
2751 case ARM::t2LDRBi12:
2752 ConvOpcode = ARM::t2LDRBi8;
2754 case ARM::t2LDRSBi12:
2755 ConvOpcode = ARM::t2LDRSBi8;
2757 case ARM::t2STRHi12:
2758 ConvOpcode = ARM::t2STRHi8;
2760 case ARM::t2STRBi12:
2761 ConvOpcode = ARM::t2STRBi8;
2767 "Illegal Address Immediate after convert!");
2771 .
add(
MI->getOperand(0))
2772 .
add(
MI->getOperand(1))
2773 .
addImm(OldOffset - Offset)
2774 .
add(
MI->getOperand(3))
2775 .
add(
MI->getOperand(4))
2777 MI->eraseFromParent();
2796 TRC =
TII->getRegClass(MCID, 2,
TRI, *MF);
2805 return BuildMI(*
MI->getParent(),
MI,
MI->getDebugLoc(), MCID)
2807 .
add(
MI->getOperand(0))
2808 .
add(
MI->getOperand(1))
2810 .
add(
MI->getOperand(3))
2811 .
add(
MI->getOperand(4))
2812 .
add(
MI->getOperand(5))
2815 if (
MI->mayLoad()) {
2816 return BuildMI(*
MI->getParent(),
MI,
MI->getDebugLoc(), MCID)
2817 .
add(
MI->getOperand(0))
2819 .
add(
MI->getOperand(1))
2821 .
add(
MI->getOperand(3))
2822 .
add(
MI->getOperand(4))
2825 return BuildMI(*
MI->getParent(),
MI,
MI->getDebugLoc(), MCID)
2827 .
add(
MI->getOperand(0))
2828 .
add(
MI->getOperand(1))
2830 .
add(
MI->getOperand(3))
2831 .
add(
MI->getOperand(4))
2855 bool ARMPreAllocLoadStoreOpt::DistributeIncrements(
Register Base) {
2875 if (!
Use.getOperand(BaseOp).isReg() ||
2876 Use.getOperand(BaseOp).getReg() !=
Base)
2880 else if (
Use.getOperand(BaseOp + 1).getImm() == 0)
2886 int IncrementOffset;
2888 if (BaseAccess && Increment) {
2892 if (
Increment->definesRegister(ARM::CPSR) ||
2896 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on VirtualReg "
2897 <<
Base.virtRegIndex() <<
"\n");
2902 if (!DT->dominates(BaseAccess, &
Use) || &
Use == BaseAccess) {
2903 LLVM_DEBUG(
dbgs() <<
" BaseAccess doesn't dominate use of increment\n");
2913 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on postinc\n");
2917 else if (PrePostInc) {
2925 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on already "
2926 <<
"indexed VirtualReg " <<
Base.virtRegIndex() <<
"\n");
2929 BaseAccess = PrePostInc;
2944 int CodesizeEstimate = -1;
2945 for (
auto *
Use : OtherAccesses) {
2946 if (DT->dominates(BaseAccess,
Use)) {
2950 Use->getOperand(BaseOp + 1).getImm() -
2952 TII, CodesizeEstimate)) {
2953 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on use\n");
2956 }
else if (!DT->dominates(
Use, BaseAccess)) {
2958 dbgs() <<
" Unknown dominance relation between Base and Use\n");
2962 if (STI->hasMinSize() && CodesizeEstimate > 0) {
2963 LLVM_DEBUG(
dbgs() <<
" Expected to grow instructions under minsize\n");
2971 NewBaseReg =
Increment->getOperand(0).getReg();
2976 (void)BaseAccessPost;
2980 for (
auto *
Use : SuccessorAccesses) {
2989 Op.setIsKill(
false);
2993 bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
2994 bool Changed =
false;
2996 for (
auto &
MBB : *MF) {
2997 for (
auto &
MI :
MBB) {
2999 if (BaseOp == -1 || !
MI.getOperand(BaseOp).isReg())
3010 for (
auto Base : Visited)
3011 Changed |= DistributeIncrements(
Base);
3019 return new ARMPreAllocLoadStoreOpt();
3020 return new ARMLoadStoreOpt();