47 #define DEBUG_TYPE "thumb2-reduce-size"
48 #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
50 STATISTIC(NumNarrows,
"Number of 32-bit instrs reduced to 16-bit ones");
51 STATISTIC(Num2Addrs,
"Number of 32-bit instrs reduced to 2addr 16-bit ones");
52 STATISTIC(NumLdSts,
"Number of 32-bit load / store reduced to 16-bit ones");
71 unsigned LowRegs1 : 1;
72 unsigned LowRegs2 : 1;
77 unsigned PartFlag : 1;
79 unsigned AvoidMovs: 1;
82 static const ReduceEntry ReduceTable[] = {
84 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
85 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
86 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
87 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
88 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
89 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
90 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
91 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
92 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
95 { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
96 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
97 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
98 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
101 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
102 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
103 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
104 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
105 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
106 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
108 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
109 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
110 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
111 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
112 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
113 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
114 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
115 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
116 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
117 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
118 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
119 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
120 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
121 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
122 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
123 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
124 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
125 { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
126 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
127 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
128 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
132 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
133 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
134 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
135 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
136 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
137 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
138 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
139 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
140 { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
141 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
142 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
143 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
144 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
145 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
146 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
147 { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
149 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
150 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
151 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
155 { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
156 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
157 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
188 bool LiveCPSR,
bool &HasCC,
bool &CCDead);
191 const ReduceEntry &Entry);
194 const ReduceEntry &Entry,
bool LiveCPSR,
bool IsSelfLoop);
199 const ReduceEntry &Entry,
bool LiveCPSR,
205 const ReduceEntry &Entry,
bool LiveCPSR,
210 bool IsSelfLoop,
bool SkipPrologueEpilogue);
222 bool HighLatencyCPSR;
226 bool HighLatencyCPSR =
false;
228 bool Visited =
false;
247 OptimizeSize = MinimizeSize =
false;
249 unsigned FromOpc = ReduceTable[
i].WideOpc;
250 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc,
i)).second)
257 if (*Regs == ARM::CPSR)
264 switch(
Def->getOpcode()) {
290 Thumb2SizeReduce::canAddPseudoFlagDep(
MachineInstr *
Use,
bool FirstInSelfLoop) {
292 if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
298 return HighLatencyCPSR || FirstInSelfLoop;
302 if (!MO.isReg() || MO.isUndef() || MO.isUse())
305 if (
Reg == 0 ||
Reg == ARM::CPSR)
311 if (!MO.isReg() || MO.isUndef() || MO.isDef())
324 if (
Use->getOpcode() == ARM::t2MOVi ||
325 Use->getOpcode() == ARM::t2MOVi16)
333 Thumb2SizeReduce::VerifyPredAndCC(
MachineInstr *
MI,
const ReduceEntry &Entry,
335 bool LiveCPSR,
bool &HasCC,
bool &CCDead) {
336 if ((is2Addr && Entry.PredCC2 == 0) ||
337 (!is2Addr && Entry.PredCC1 == 0)) {
356 }
else if ((is2Addr && Entry.PredCC2 == 2) ||
357 (!is2Addr && Entry.PredCC1 == 2)) {
376 unsigned Opc =
MI->getOpcode();
377 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
378 bool isLROk = (Opc == ARM::t2STMDB_UPD);
379 bool isSPOk = isPCOk || isLROk;
380 for (
unsigned i = 0,
e =
MI->getNumOperands();
i !=
e; ++
i) {
385 if (
Reg == 0 ||
Reg == ARM::CPSR)
387 if (isPCOk &&
Reg == ARM::PC)
389 if (isLROk &&
Reg == ARM::LR)
391 if (
Reg == ARM::SP) {
394 if (
i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
406 const ReduceEntry &Entry) {
411 bool HasImmOffset =
false;
412 bool HasShift =
false;
413 bool HasOffReg =
true;
414 bool isLdStMul =
false;
415 unsigned Opc = Entry.NarrowOpc1;
417 uint8_t ImmLimit = Entry.Imm1Limit;
419 switch (Entry.WideOpc) {
424 if (
MI->getOperand(1).getReg() == ARM::SP) {
425 Opc = Entry.NarrowOpc2;
426 ImmLimit = Entry.Imm2Limit;
455 case ARM::t2LDR_POST:
456 case ARM::t2STR_POST: {
460 if (!
MI->hasOneMemOperand() ||
461 (*
MI->memoperands_begin())->getAlign() <
Align(4))
467 bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
468 Register Rt =
MI->getOperand(IsStore ? 1 : 0).getReg();
469 Register Rn =
MI->getOperand(IsStore ? 0 : 1).getReg();
470 unsigned Offset =
MI->getOperand(3).getImm();
471 unsigned PredImm =
MI->getOperand(4).getImm();
472 Register PredReg =
MI->getOperand(5).getReg();
495 MI->eraseFromBundle();
500 Register BaseReg =
MI->getOperand(0).getReg();
507 if (MO.getReg() == BaseReg) {
524 if (!
MI->getOperand(0).isKill())
530 Register BaseReg =
MI->getOperand(0).getReg();
532 if (MO.getReg() == BaseReg)
537 case ARM::t2LDMIA_RET: {
538 Register BaseReg =
MI->getOperand(1).getReg();
539 if (BaseReg != ARM::SP)
541 Opc = Entry.NarrowOpc2;
546 case ARM::t2LDMIA_UPD:
547 case ARM::t2STMIA_UPD:
548 case ARM::t2STMDB_UPD: {
551 Register BaseReg =
MI->getOperand(1).getReg();
552 if (BaseReg == ARM::SP &&
553 (Entry.WideOpc == ARM::t2LDMIA_UPD ||
554 Entry.WideOpc == ARM::t2STMDB_UPD)) {
555 Opc = Entry.NarrowOpc2;
558 (Entry.WideOpc != ARM::t2LDMIA_UPD &&
559 Entry.WideOpc != ARM::t2STMIA_UPD)) {
568 unsigned OffsetReg = 0;
569 bool OffsetKill =
false;
570 bool OffsetInternal =
false;
572 OffsetReg =
MI->getOperand(2).getReg();
573 OffsetKill =
MI->getOperand(2).isKill();
574 OffsetInternal =
MI->getOperand(2).isInternalRead();
576 if (
MI->getOperand(3).getImm())
581 unsigned OffsetImm = 0;
583 OffsetImm =
MI->getOperand(2).getImm();
584 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
586 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
597 if (Entry.WideOpc == ARM::t2STMIA)
601 MIB.
add(
MI->getOperand(0));
602 MIB.
add(
MI->getOperand(1));
605 MIB.
addImm(OffsetImm / Scale);
607 assert((!HasShift || OffsetReg) &&
"Invalid so_reg load / store address!");
625 <<
" to 16-bit: " << *MIB);
634 const ReduceEntry &Entry,
635 bool LiveCPSR,
bool IsSelfLoop) {
636 unsigned Opc =
MI->getOpcode();
637 if (Opc == ARM::t2ADDri) {
640 if (
MI->getOperand(1).getReg() != ARM::SP) {
641 if (ReduceTo2Addr(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop))
643 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
646 unsigned Imm =
MI->getOperand(2).getImm();
650 if (
Imm & 3 ||
Imm > 1020)
663 TII->get(ARM::tADDrSPi))
664 .
add(
MI->getOperand(0))
665 .
add(
MI->getOperand(1))
673 <<
" to 16-bit: " << *MIB);
683 if (
MI->mayLoadOrStore())
684 return ReduceLoadStore(
MBB,
MI, Entry);
689 case ARM::t2ADDSrr: {
695 if (ReduceTo2Addr(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop))
699 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
710 if (
MI->getOperand(2).getImm() == 0)
711 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
716 if (
MI->getOperand(1).isImm())
717 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
725 static const ReduceEntry NarrowEntry =
726 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
727 if (ReduceToNarrow(
MBB,
MI, NarrowEntry, LiveCPSR, IsSelfLoop))
729 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
738 if (
MI->getOperand(0).isKill())
739 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
747 const ReduceEntry &Entry,
748 bool LiveCPSR,
bool IsSelfLoop) {
752 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
760 if (
MI->getOpcode() == ARM::t2MUL) {
776 }
else if (Reg0 != Reg1) {
778 unsigned CommOpIdx1 = 1;
780 if (!
TII->findCommutedOpIndices(*
MI, CommOpIdx1, CommOpIdx2) ||
781 MI->getOperand(CommOpIdx2).getReg() != Reg0)
784 TII->commuteInstruction(*
MI,
false, CommOpIdx1, CommOpIdx2);
790 if (Entry.Imm2Limit) {
791 unsigned Imm =
MI->getOperand(2).getImm();
792 unsigned Limit = (1 << Entry.Imm2Limit) - 1;
805 bool SkipPred =
false;
819 HasCC = (
MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
820 if (HasCC &&
MI->getOperand(NumOps-1).isDead())
823 if (!VerifyPredAndCC(
MI, Entry,
true, Pred, LiveCPSR, HasCC, CCDead))
829 canAddPseudoFlagDep(
MI, IsSelfLoop))
835 MIB.
add(
MI->getOperand(0));
841 for (
unsigned i = 1,
e =
MI->getNumOperands();
i !=
e; ++
i) {
846 MIB.
add(
MI->getOperand(
i));
853 <<
" to 16-bit: " << *MIB);
862 const ReduceEntry &Entry,
863 bool LiveCPSR,
bool IsSelfLoop) {
867 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
872 unsigned Limit = ~0U;
874 Limit = (1 << Entry.Imm1Limit) - 1;
883 if (!
Reg ||
Reg == ARM::CPSR)
887 }
else if (MO.
isImm() &&
889 if (((
unsigned)MO.
getImm()) > Limit)
898 bool SkipPred =
false;
911 HasCC = (
MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
912 if (HasCC &&
MI->getOperand(NumOps-1).isDead())
915 if (!VerifyPredAndCC(
MI, Entry,
false, Pred, LiveCPSR, HasCC, CCDead))
921 canAddPseudoFlagDep(
MI, IsSelfLoop))
932 MIB.
add(
MI->getOperand(0));
939 MIB.
add(
MI->getOperand(0));
941 MIB.
add(
MI->getOperand(0));
948 for (
unsigned i = 1,
e =
MI->getNumOperands();
i !=
e; ++
i) {
951 if ((MCID.
getOpcode() == ARM::t2RSBSri ||
960 if (SkipPred && isPred)
976 <<
" to 16-bit: " << *MIB);
988 if (MO.
getReg() != ARM::CPSR)
996 return HasDef || LiveCPSR;
1003 if (MO.
getReg() != ARM::CPSR)
1005 assert(LiveCPSR &&
"CPSR liveness tracking is wrong!");
1016 bool LiveCPSR,
bool IsSelfLoop,
1017 bool SkipPrologueEpilogue) {
1018 unsigned Opcode =
MI->getOpcode();
1020 if (OPI == ReduceOpcodeMap.end())
1025 const ReduceEntry &Entry = ReduceTable[OPI->second];
1029 return ReduceSpecial(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
1032 if (Entry.NarrowOpc2 &&
1033 ReduceTo2Addr(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop))
1037 if (Entry.NarrowOpc1 &&
1038 ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop))
1045 bool SkipPrologueEpilogue) {
1053 HighLatencyCPSR =
false;
1057 const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
1058 if (!PInfo.Visited) {
1062 if (PInfo.HighLatencyCPSR) {
1063 HighLatencyCPSR =
true;
1073 for (; MII !=
E; MII = NextMII) {
1074 NextMII = std::next(MII);
1077 if (
MI->isBundle()) {
1081 if (
MI->isDebugInstr())
1087 bool NextInSameBundle = NextMII !=
E && NextMII->isBundledWithPred();
1089 if (ReduceMI(
MBB,
MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
1095 if (NextInSameBundle && !NextMII->isBundledWithPred())
1096 NextMII->bundleWithPred();
1099 if (BundleMI && !NextInSameBundle &&
MI->isInsideBundle()) {
1114 bool DefCPSR =
false;
1119 HighLatencyCPSR =
false;
1121 }
else if (DefCPSR) {
1130 Info.HighLatencyCPSR = HighLatencyCPSR;
1131 Info.Visited =
true;
1136 if (PredicateFtor && !PredicateFtor(MF.
getFunction()))
1167 return new Thumb2SizeReduce(
std::move(Ftor));