103#define DEBUG_TYPE "peephole-opt"
107 cl::desc(
"Aggressive extension optimization"));
111 cl::desc(
"Disable the peephole optimizer"));
118 cl::desc(
"Disable advanced copy optimization"));
122 cl::desc(
"Disable non-allocatable physical register copy optimization"));
128 cl::desc(
"Limit the length of PHI chains to lookup"));
134 cl::desc(
"Maximum length of recurrence chain when evaluating the benefit "
135 "of commuting operands"));
137STATISTIC(NumReuse,
"Number of extension results reused");
139STATISTIC(NumImmFold,
"Number of move immediate folded");
142STATISTIC(NumUncoalescableCopies,
"Number of uncoalescable copies optimized");
143STATISTIC(NumRewrittenCopies,
"Number of copies rewritten");
144STATISTIC(NumNAPhysCopies,
"Number of non-allocatable physical copies removed");
148class ValueTrackerResult;
149class RecurrenceInstr;
155 int CurrentSrcIdx = 0;
158 virtual ~Rewriter() =
default;
190 virtual bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg) = 0;
194class CopyRewriter :
public Rewriter {
197 assert(
MI.isCopy() &&
"Expected copy instruction");
199 virtual ~CopyRewriter() =
default;
203 if (++CurrentSrcIdx > 1)
207 const MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
210 const MachineOperand &MODef = CopyLike.getOperand(0);
215 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
216 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
225class UncoalescableRewriter :
public Rewriter {
229 UncoalescableRewriter(MachineInstr &
MI) :
Rewriter(
MI) {
230 NumDefs =
MI.getDesc().getNumDefs();
240 if (CurrentSrcIdx == NumDefs)
243 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
245 if (CurrentSrcIdx == NumDefs)
251 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
258 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
264class InsertSubregRewriter :
public Rewriter {
267 assert(
MI.isInsertSubreg() &&
"Invalid instruction");
284 if (CurrentSrcIdx == 2)
288 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
290 const MachineOperand &MODef = CopyLike.getOperand(0);
298 (
unsigned)CopyLike.getOperand(3).getImm());
302 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
303 if (CurrentSrcIdx != 2)
306 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
314class ExtractSubregRewriter :
public Rewriter {
315 const TargetInstrInfo &TII;
318 ExtractSubregRewriter(MachineInstr &
MI,
const TargetInstrInfo &TII)
320 assert(
MI.isExtractSubreg() &&
"Invalid instruction");
331 if (CurrentSrcIdx == 1)
335 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
344 const MachineOperand &MODef = CopyLike.getOperand(0);
349 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
351 if (CurrentSrcIdx != 1)
354 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
365 CopyLike.removeOperand(2);
367 CopyLike.setDesc(TII.get(TargetOpcode::COPY));
370 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
376class RegSequenceRewriter :
public Rewriter {
379 assert(
MI.isRegSequence() &&
"Invalid instruction");
403 if (
static_cast<unsigned>(CurrentSrcIdx) >= CopyLike.getNumOperands())
406 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
407 Src.Reg = MOInsertedReg.
getReg();
412 Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
414 const MachineOperand &MODef = CopyLike.getOperand(0);
416 assert(MODef.
getSubReg() == 0 &&
"cannot have subregister def in SSA");
420 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
421 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
429 const TargetInstrInfo *TII =
nullptr;
430 const TargetRegisterInfo *TRI =
nullptr;
431 MachineRegisterInfo *MRI =
nullptr;
432 MachineDominatorTree *DT =
nullptr;
433 MachineLoopInfo *MLI =
nullptr;
436 PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
437 : DT(DT), MLI(MLI) {}
439 bool run(MachineFunction &MF);
441 using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
444 using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
447 bool optimizeCmpInstr(MachineInstr &
MI);
448 bool optimizeExtInstr(MachineInstr &
MI, MachineBasicBlock &
MBB,
449 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
450 bool optimizeSelect(MachineInstr &
MI,
451 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
452 bool optimizeCondBranch(MachineInstr &
MI);
454 bool optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter);
455 bool optimizeCoalescableCopy(MachineInstr &
MI);
456 bool optimizeUncoalescableCopy(MachineInstr &
MI,
457 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
458 bool optimizeRecurrence(MachineInstr &
PHI);
459 bool findNextSource(
const TargetRegisterClass *DefRC,
unsigned DefSubReg,
461 bool isMoveImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
462 DenseMap<Register, MachineInstr *> &ImmDefMIs);
463 bool foldImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
464 DenseMap<Register, MachineInstr *> &ImmDefMIs,
472 const SmallSet<Register, 2> &TargetReg,
473 RecurrenceCycle &RC);
480 bool foldRedundantCopy(MachineInstr &
MI);
491 foldRedundantNAPhysCopy(MachineInstr &
MI,
492 DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
494 bool isLoadFoldable(MachineInstr &
MI,
495 SmallSet<Register, 16> &FoldAsLoadDefCandidates);
499 static bool isCoalescableCopy(
const MachineInstr &
MI) {
502 return MI.isCopy() ||
504 MI.isExtractSubreg()));
509 static bool isUncoalescableCopy(
const MachineInstr &
MI) {
511 MI.isInsertSubregLike() ||
512 MI.isExtractSubregLike()));
515 MachineInstr &rewriteSource(MachineInstr &CopyLike,
RegSubRegPair Def,
516 RewriteMapTy &RewriteMap);
520 DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
523 void MF_HandleInsertion(MachineInstr &
MI)
override {}
530 unsigned SrcSubReg =
MI.getOperand(1).getSubReg();
531 if (!SrcReg.
isVirtual() && !MRI->isConstantPhysReg(SrcReg))
540 void deleteChangedCopy(MachineInstr &
MI) {
542 if (!getCopySrc(
MI, SrcPair))
545 auto It = CopySrcMIs.find(SrcPair);
546 if (It != CopySrcMIs.end() && It->second == &
MI)
547 CopySrcMIs.erase(It);
550 void MF_HandleRemoval(MachineInstr &
MI)
override { deleteChangedCopy(
MI); }
552 void MF_HandleChangeDesc(MachineInstr &
MI,
const MCInstrDesc &TID)
override {
553 deleteChangedCopy(
MI);
561 PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {
565 bool runOnMachineFunction(MachineFunction &MF)
override;
567 void getAnalysisUsage(AnalysisUsage &AU)
const override {
578 MachineFunctionProperties getRequiredProperties()
const override {
579 return MachineFunctionProperties().setIsSSA();
589class RecurrenceInstr {
591 using IndexPair = std::pair<unsigned, unsigned>;
593 RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
594 RecurrenceInstr(MachineInstr *MI,
unsigned Idx1,
unsigned Idx2)
595 : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
597 MachineInstr *getMI()
const {
return MI; }
598 std::optional<IndexPair> getCommutePair()
const {
return CommutePair; }
602 std::optional<IndexPair> CommutePair;
608class ValueTrackerResult {
614 const MachineInstr *Inst =
nullptr;
617 ValueTrackerResult() =
default;
621 bool isValid()
const {
return getNumSources() > 0; }
623 void setInst(
const MachineInstr *
I) { Inst =
I; }
624 const MachineInstr *getInst()
const {
return Inst; }
631 void addSource(
Register SrcReg,
unsigned SrcSubReg) {
635 void setSource(
int Idx,
Register SrcReg,
unsigned SrcSubReg) {
636 assert(Idx < getNumSources() &&
"Reg pair source out of index");
640 int getNumSources()
const {
return RegSrcs.size(); }
645 assert(Idx < getNumSources() &&
"Reg source out of index");
646 return RegSrcs[Idx].Reg;
649 unsigned getSrcSubReg(
int Idx)
const {
650 assert(Idx < getNumSources() &&
"SubReg source out of index");
651 return RegSrcs[Idx].SubReg;
655 if (
Other.getInst() != getInst())
658 if (
Other.getNumSources() != getNumSources())
661 for (
int i = 0, e =
Other.getNumSources(); i != e; ++i)
662 if (
Other.getSrcReg(i) != getSrcReg(i) ||
663 Other.getSrcSubReg(i) != getSrcSubReg(i))
688 const MachineInstr *Def =
nullptr;
700 const MachineRegisterInfo &MRI;
703 const TargetInstrInfo *TII;
706 ValueTrackerResult getNextSourceImpl();
709 ValueTrackerResult getNextSourceFromCopy();
712 ValueTrackerResult getNextSourceFromBitcast();
715 ValueTrackerResult getNextSourceFromRegSequence();
718 ValueTrackerResult getNextSourceFromInsertSubreg();
721 ValueTrackerResult getNextSourceFromExtractSubreg();
724 ValueTrackerResult getNextSourceFromSubregToReg();
727 ValueTrackerResult getNextSourceFromPHI();
739 ValueTracker(
Register Reg,
unsigned DefSubReg,
const MachineRegisterInfo &MRI,
740 const TargetInstrInfo *TII =
nullptr)
741 : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
742 if (!Reg.isPhysical()) {
743 Def = MRI.getVRegDef(Reg);
744 DefIdx = MRI.def_begin(Reg).getOperandNo();
753 ValueTrackerResult getNextSource();
758char PeepholeOptimizerLegacy::ID = 0;
763 "Peephole Optimizations",
false,
false)
777bool PeepholeOptimizer::optimizeExtInstr(
782 if (!
TII->isCoalescableExtInstr(
MI, SrcReg, DstReg, SubIdx))
788 if (
MRI->hasOneNonDBGUse(SrcReg))
795 DstRC =
TRI->getSubClassWithSubReg(DstRC, SubIdx);
805 TRI->getSubClassWithSubReg(
MRI->getRegClass(SrcReg), SubIdx) !=
nullptr;
811 ReachedBBs.insert(UI.getParent());
819 bool ExtendLife =
true;
821 MachineInstr *UseMI = UseMO.getParent();
825 if (UseMI->isPHI()) {
831 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
851 if (
UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
855 if (UseMBB == &
MBB) {
857 if (!LocalMIs.count(
UseMI))
858 Uses.push_back(&UseMO);
859 }
else if (ReachedBBs.count(UseMBB)) {
862 Uses.push_back(&UseMO);
866 ExtendedUses.push_back(&UseMO);
875 if (ExtendLife && !ExtendedUses.empty())
877 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
882 SmallPtrSet<MachineBasicBlock *, 4> PHIBBs;
887 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
889 PHIBBs.insert(UI.getParent());
891 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
892 for (MachineOperand *UseMO : Uses) {
893 MachineInstr *UseMI = UseMO->getParent();
894 MachineBasicBlock *UseMBB = UseMI->getParent();
895 if (PHIBBs.count(UseMBB))
900 MRI->clearKillFlags(DstReg);
901 MRI->constrainRegClass(DstReg, DstRC);
919 RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
921 Register NewVR = MRI->createVirtualRegister(RC);
922 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
923 TII->get(TargetOpcode::COPY), NewVR)
924 .addReg(DstReg, 0, SubIdx);
928 UseMO->setReg(NewVR);
945 int64_t CmpMask, CmpValue;
952 if (
TII->optimizeCompareInstr(
MI, SrcReg, SrcReg2, CmpMask, CmpValue,
MRI)) {
962bool PeepholeOptimizer::optimizeSelect(
963 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
965 unsigned FalseOp = 0;
966 bool Optimizable =
false;
968 if (
TII->analyzeSelect(
MI,
Cond, TrueOp, FalseOp, Optimizable))
972 if (!
TII->optimizeSelect(
MI, LocalMIs))
975 MI.eraseFromParent();
981bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &
MI) {
982 return TII->optimizeCondBranch(
MI);
998bool PeepholeOptimizer::findNextSource(
const TargetRegisterClass *DefRC,
1001 RewriteMapTy &RewriteMap) {
1010 unsigned PHICount = 0;
1017 ValueTracker ValTracker(CurSrcPair.
Reg, CurSrcPair.
SubReg, *
MRI,
TII);
1022 ValueTrackerResult Res = ValTracker.getNextSource();
1028 auto [InsertPt, WasInserted] = RewriteMap.try_emplace(CurSrcPair, Res);
1031 const ValueTrackerResult &CurSrcRes = InsertPt->second;
1033 assert(CurSrcRes == Res &&
"ValueTrackerResult found must match");
1036 if (CurSrcRes.getNumSources() > 1) {
1038 <<
"findNextSource: found PHI cycle, aborting...\n");
1046 unsigned NumSrcs = Res.getNumSources();
1054 for (
unsigned i = 0; i < NumSrcs; ++i)
1059 CurSrcPair = Res.getSrc(0);
1068 const TargetRegisterClass *SrcRC =
MRI->getRegClass(CurSrcPair.
Reg);
1069 if (!
TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
1075 if (PHICount > 0 && CurSrcPair.
SubReg != 0)
1081 }
while (!SrcToLook.
empty());
1084 return CurSrcPair.
Reg !=
Reg;
1096 assert(!SrcRegs.
empty() &&
"No sources to create a PHI instruction?");
1101 assert(SrcRegs[0].
SubReg == 0 &&
"should not have subreg operand");
1102 Register NewVR =
MRI.createVirtualRegister(NewRC);
1105 TII.get(TargetOpcode::PHI), NewVR);
1107 unsigned MBBOpIdx = 2;
1109 MIB.
addReg(RegPair.Reg, 0, RegPair.SubReg);
1114 MRI.clearKillFlags(RegPair.Reg);
1130 const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1131 bool HandleMultipleSources =
true) {
1134 ValueTrackerResult Res = RewriteMap.
lookup(LookupSrc);
1140 unsigned NumSrcs = Res.getNumSources();
1142 LookupSrc.
Reg = Res.getSrcReg(0);
1143 LookupSrc.
SubReg = Res.getSrcSubReg(0);
1148 if (!HandleMultipleSources)
1154 for (
unsigned i = 0; i < NumSrcs; ++i) {
1155 RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1173bool PeepholeOptimizer::optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter) {
1179 while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
1180 if (Dst.Reg.isPhysical()) {
1188 const TargetRegisterClass *DefRC =
MRI->getRegClass(Dst.Reg);
1191 RewriteMapTy RewriteMap;
1194 if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
1202 "should not rewrite source to original value");
1210 const TargetRegisterClass *RC =
MRI->getRegClass(NewSrc.
Reg);
1211 const TargetRegisterClass *WithSubRC =
1212 TRI->getSubClassWithSubReg(RC, NewSrc.
SubReg);
1213 if (!
MRI->constrainRegClass(NewSrc.
Reg, WithSubRC))
1219 if (CpyRewriter.RewriteCurrentSource(NewSrc.
Reg, NewSrc.
SubReg)) {
1221 MRI->clearKillFlags(NewSrc.
Reg);
1231 NumRewrittenCopies +=
Changed;
1246bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &
MI) {
1247 assert(isCoalescableCopy(
MI) &&
"Invalid argument");
1248 assert(
MI.getDesc().getNumDefs() == 1 &&
1249 "Coalescer can understand multiple defs?!");
1250 const MachineOperand &MODef =
MI.getOperand(0);
1255 switch (
MI.getOpcode()) {
1256 case TargetOpcode::COPY:
1257 return optimizeCoalescableCopyImpl(CopyRewriter(
MI));
1258 case TargetOpcode::INSERT_SUBREG:
1259 return optimizeCoalescableCopyImpl(InsertSubregRewriter(
MI));
1260 case TargetOpcode::EXTRACT_SUBREG:
1261 return optimizeCoalescableCopyImpl(ExtractSubregRewriter(
MI, *
TII));
1262 case TargetOpcode::REG_SEQUENCE:
1263 return optimizeCoalescableCopyImpl(RegSequenceRewriter(
MI));
1266 if (
MI.isBitcast() ||
MI.isRegSequenceLike() ||
MI.isInsertSubregLike() ||
1267 MI.isExtractSubregLike())
1268 return optimizeCoalescableCopyImpl(UncoalescableRewriter(
MI));
1278MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1280 RewriteMapTy &RewriteMap) {
1281 assert(!
Def.Reg.isPhysical() &&
"We do not rewrite physical registers");
1287 const TargetRegisterClass *DefRC =
MRI->getRegClass(
Def.Reg);
1288 Register NewVReg =
MRI->createVirtualRegister(DefRC);
1291 const TargetRegisterClass *NewSrcRC =
MRI->getRegClass(NewSrc.
Reg);
1292 const TargetRegisterClass *WithSubRC =
1293 TRI->getSubClassWithSubReg(NewSrcRC, NewSrc.
SubReg);
1298 if (!
MRI->constrainRegClass(NewSrc.
Reg, WithSubRC))
1302 MachineInstr *NewCopy =
1304 TII->get(TargetOpcode::COPY), NewVReg)
1315 MRI->replaceRegWith(
Def.Reg, NewVReg);
1316 MRI->clearKillFlags(NewVReg);
1320 MRI->clearKillFlags(NewSrc.
Reg);
1336bool PeepholeOptimizer::optimizeUncoalescableCopy(
1337 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
1338 assert(isUncoalescableCopy(
MI) &&
"Invalid argument");
1339 UncoalescableRewriter CpyRewriter(
MI);
1344 RewriteMapTy RewriteMap;
1348 while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1351 if (
Def.Reg.isPhysical())
1357 const TargetRegisterClass *DefRC =
MRI->getRegClass(
Def.Reg);
1361 if (!findNextSource(DefRC,
Def.SubReg, Def, RewriteMap))
1370 MachineInstr &NewCopy = rewriteSource(
MI, Def, RewriteMap);
1371 LocalMIs.
insert(&NewCopy);
1376 MI.eraseFromParent();
1377 ++NumUncoalescableCopies;
1384bool PeepholeOptimizer::isLoadFoldable(
1385 MachineInstr &
MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1386 if (!
MI.canFoldAsLoad() || !
MI.mayLoad())
1388 const MCInstrDesc &MCID =
MI.getDesc();
1397 MRI->hasOneNonDBGUser(
Reg)) {
1404bool PeepholeOptimizer::isMoveImmediate(
1405 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1406 DenseMap<Register, MachineInstr *> &ImmDefMIs) {
1407 const MCInstrDesc &MCID =
MI.getDesc();
1408 if (MCID.
getNumDefs() != 1 || !
MI.getOperand(0).isReg())
1415 if (!
MI.isMoveImmediate() && !
TII->getConstValDefinedInReg(
MI,
Reg, ImmVal))
1426bool PeepholeOptimizer::foldImmediate(
1427 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1428 DenseMap<Register, MachineInstr *> &ImmDefMIs,
bool &
Deleted) {
1430 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1431 MachineOperand &MO =
MI.getOperand(i);
1439 DenseMap<Register, MachineInstr *>::iterator
II = ImmDefMIs.
find(
Reg);
1440 assert(
II != ImmDefMIs.
end() &&
"couldn't find immediate definition");
1446 if (
MRI->getVRegDef(
Reg) &&
1450 MRI->getRegClass(DstReg) ==
MRI->getRegClass(
Reg)) {
1451 MRI->replaceRegWith(DstReg,
Reg);
1452 MI.eraseFromParent();
1476bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &
MI) {
1477 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1480 if (!getCopySrc(
MI, SrcPair))
1487 if (CopySrcMIs.
insert(std::make_pair(SrcPair, &
MI)).second) {
1492 MachineInstr *PrevCopy = CopySrcMIs.
find(SrcPair)->second;
1495 "Unexpected mismatching subreg!");
1503 if (
MRI->getRegClass(DstReg) !=
MRI->getRegClass(PrevDstReg))
1506 MRI->replaceRegWith(DstReg, PrevDstReg);
1509 MRI->clearKillFlags(PrevDstReg);
1513bool PeepholeOptimizer::isNAPhysCopy(
Register Reg) {
1517bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1518 MachineInstr &
MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
1519 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1526 if (isNAPhysCopy(SrcReg) && DstReg.
isVirtual()) {
1530 NAPhysToVirtMIs.
insert({SrcReg, &
MI});
1534 if (!(SrcReg.
isVirtual() && isNAPhysCopy(DstReg)))
1538 auto PrevCopy = NAPhysToVirtMIs.
find(DstReg);
1539 if (PrevCopy == NAPhysToVirtMIs.
end()) {
1542 LLVM_DEBUG(
dbgs() <<
"NAPhysCopy: intervening clobber forbids erasing "
1548 if (PrevDstReg == SrcReg) {
1561 NAPhysToVirtMIs.
erase(PrevCopy);
1570bool PeepholeOptimizer::findTargetRecurrence(
1571 Register Reg,
const SmallSet<Register, 2> &TargetRegs,
1572 RecurrenceCycle &RC) {
1582 if (!
MRI->hasOneNonDBGUse(
Reg))
1589 MachineInstr &
MI = *(
MRI->use_instr_nodbg_begin(
Reg));
1590 unsigned Idx =
MI.findRegisterUseOperandIdx(
Reg,
nullptr);
1594 if (
MI.getDesc().getNumDefs() != 1)
1597 MachineOperand &DefOp =
MI.getOperand(0);
1604 unsigned TiedUseIdx;
1605 if (!
MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1608 if (Idx == TiedUseIdx) {
1609 RC.push_back(RecurrenceInstr(&
MI));
1610 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1614 if (
TII->findCommutedOpIndices(
MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1615 RC.push_back(RecurrenceInstr(&
MI, Idx, CommIdx));
1616 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1641bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &
PHI) {
1642 SmallSet<Register, 2> TargetRegs;
1643 for (
unsigned Idx = 1; Idx <
PHI.getNumOperands(); Idx += 2) {
1644 MachineOperand &MO =
PHI.getOperand(Idx);
1651 if (findTargetRecurrence(
PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1655 for (
auto &RI : RC) {
1657 auto CP = RI.getCommutePair();
1660 TII->commuteInstruction(*(RI.getMI()),
false, (*CP).first,
1677 PeepholeOptimizer Impl(DT, MLI);
1689bool PeepholeOptimizerLegacy::runOnMachineFunction(
MachineFunction &MF) {
1693 ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1695 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1696 PeepholeOptimizer Impl(DT, MLI);
1697 return Impl.run(MF);
1702 LLVM_DEBUG(
dbgs() <<
"********** PEEPHOLE OPTIMIZER **********\n");
1716 bool SeenMoveImm =
false;
1749 if (
MI->isDebugInstr())
1752 if (
MI->isPosition())
1755 if (IsLoopHeader &&
MI->isPHI()) {
1756 if (optimizeRecurrence(*
MI)) {
1762 if (!
MI->isCopy()) {
1763 for (
const MachineOperand &MO :
MI->operands()) {
1767 if (MO.
isDef() && isNAPhysCopy(
Reg)) {
1769 if (Def != NAPhysToVirtMIs.
end()) {
1773 <<
"NAPhysCopy: invalidating because of " << *
MI);
1774 NAPhysToVirtMIs.
erase(Def);
1779 for (
auto &RegMI : NAPhysToVirtMIs) {
1783 <<
"NAPhysCopy: invalidating because of " << *
MI);
1784 NAPhysToVirtMIs.erase(Def);
1791 if (
MI->isImplicitDef() ||
MI->isKill())
1794 if (
MI->isInlineAsm() ||
MI->hasUnmodeledSideEffects()) {
1801 NAPhysToVirtMIs.clear();
1804 if ((isUncoalescableCopy(*
MI) &&
1805 optimizeUncoalescableCopy(*
MI, LocalMIs)) ||
1806 (
MI->isCompare() && optimizeCmpInstr(*
MI)) ||
1807 (
MI->isSelect() && optimizeSelect(*
MI, LocalMIs))) {
1814 if (
MI->isConditionalBranch() && optimizeCondBranch(*
MI)) {
1819 if (isCoalescableCopy(*
MI) && optimizeCoalescableCopy(*
MI)) {
1825 if (
MI->isCopy() && (foldRedundantCopy(*
MI) ||
1826 foldRedundantNAPhysCopy(*
MI, NAPhysToVirtMIs))) {
1829 MI->eraseFromParent();
1834 if (isMoveImmediate(*
MI, ImmDefRegs, ImmDefMIs)) {
1856 if (!isLoadFoldable(*
MI, FoldAsLoadDefCandidates) &&
1857 !FoldAsLoadDefCandidates.
empty()) {
1864 const MCInstrDesc &MIDesc =
MI->getDesc();
1865 for (
unsigned i = MIDesc.
getNumDefs(); i !=
MI->getNumOperands(); ++i) {
1866 const MachineOperand &MOp =
MI->getOperand(i);
1870 if (FoldAsLoadDefCandidates.
count(FoldAsLoadDefReg)) {
1875 Register FoldedReg = FoldAsLoadDefReg;
1876 MachineInstr *
DefMI =
nullptr;
1877 if (MachineInstr *FoldMI =
1878 TII->optimizeLoadInstr(*
MI,
MRI, FoldAsLoadDefReg,
DefMI)) {
1887 if (
MI->shouldUpdateAdditionalCallInfo())
1888 MI->getMF()->moveAdditionalCallInfo(
MI, FoldMI);
1889 MI->eraseFromParent();
1891 MRI->markUsesInDebugValueAsUndef(FoldedReg);
1892 FoldAsLoadDefCandidates.
erase(FoldedReg);
1906 if (
MI->isLoadFoldBarrier()) {
1908 FoldAsLoadDefCandidates.
clear();
1913 MF.resetDelegate(
this);
1917ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1918 assert(
Def->isCopy() &&
"Invalid definition");
1923 assert(
Def->getNumOperands() -
Def->getNumImplicitOperands() == 2 &&
1924 "Invalid number of operands");
1925 assert(!
Def->hasImplicitDef() &&
"Only implicit uses are allowed");
1926 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1929 const MachineOperand &Src =
Def->getOperand(1);
1931 return ValueTrackerResult();
1934 unsigned SubReg = Src.getSubReg();
1936 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
1941 const TargetRegisterClass *RegRC =
MRI.getRegClass(SrcReg);
1942 const TargetRegisterClass *SrcWithSubRC =
1943 TRI->getSubClassWithSubReg(RegRC,
SubReg);
1944 if (RegRC != SrcWithSubRC)
1945 return ValueTrackerResult();
1948 return ValueTrackerResult();
1952 return ValueTrackerResult(SrcReg,
SubReg);
1955ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1956 assert(
Def->isBitcast() &&
"Invalid definition");
1959 if (
Def->mayRaiseFPException() ||
Def->hasUnmodeledSideEffects())
1960 return ValueTrackerResult();
1963 if (
Def->getDesc().getNumDefs() != 1)
1964 return ValueTrackerResult();
1966 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1968 unsigned SrcIdx =
Def->getNumOperands();
1969 for (
unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx;
OpIdx != EndOpIdx;
1971 const MachineOperand &MO =
Def->getOperand(
OpIdx);
1977 assert(!MO.
isDef() &&
"We should have skipped all the definitions by now");
1978 if (SrcIdx != EndOpIdx)
1980 return ValueTrackerResult();
1986 if (SrcIdx >=
Def->getNumOperands())
1987 return ValueTrackerResult();
1989 const MachineOperand &DefOp =
Def->getOperand(DefIdx);
1993 for (
const MachineInstr &
UseMI :
MRI.use_nodbg_instructions(DefOp.
getReg())) {
1994 if (
UseMI.isSubregToReg())
1995 return ValueTrackerResult();
1998 const MachineOperand &Src =
Def->getOperand(SrcIdx);
2000 return ValueTrackerResult();
2001 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
2004ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
2005 assert((
Def->isRegSequence() ||
Def->isRegSequenceLike()) &&
2006 "Invalid definition");
2008 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"illegal subregister def");
2011 if (!
TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
2012 return ValueTrackerResult();
2020 if (RegSeqInput.SubIdx == DefSubReg)
2021 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
2024 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
2029 LaneBitmask DefMask =
TRI->getSubRegIndexLaneMask(DefSubReg);
2030 LaneBitmask ThisOpRegMask =
TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
2036 if ((DefMask & ThisOpRegMask) != DefMask)
2039 unsigned ReverseDefCompose =
2040 TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg);
2041 if (!ReverseDefCompose)
2044 unsigned ComposedDefInSrcReg1 =
2045 TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
2051 const TargetRegisterClass *SrcRC =
MRI.getRegClass(RegSeqInput.Reg);
2052 const TargetRegisterClass *SrcWithSubRC =
2053 TRI->getSubClassWithSubReg(SrcRC, ComposedDefInSrcReg1);
2054 if (SrcRC != SrcWithSubRC)
2055 return ValueTrackerResult();
2057 return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
2063 return ValueTrackerResult();
2066ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2067 assert((
Def->isInsertSubreg() ||
Def->isInsertSubregLike()) &&
2068 "Invalid definition");
2069 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subreg defs in SSA");
2073 if (!
TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2074 return ValueTrackerResult();
2083 if (InsertedReg.
SubIdx == DefSubReg) {
2084 return ValueTrackerResult(InsertedReg.
Reg, InsertedReg.
SubReg);
2089 const MachineOperand &MODef =
Def->getOperand(DefIdx);
2095 return ValueTrackerResult();
2099 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
2100 if ((
TRI->getSubRegIndexLaneMask(DefSubReg) &
2101 TRI->getSubRegIndexLaneMask(InsertedReg.
SubIdx))
2103 return ValueTrackerResult();
2106 return ValueTrackerResult(
BaseReg.Reg, DefSubReg);
2109ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2110 assert((
Def->isExtractSubreg() ||
Def->isExtractSubregLike()) &&
2111 "Invalid definition");
2118 return ValueTrackerResult();
2121 if (!
TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2122 return ValueTrackerResult();
2126 if (ExtractSubregInputReg.
SubReg)
2127 return ValueTrackerResult();
2129 return ValueTrackerResult(ExtractSubregInputReg.
Reg,
2130 ExtractSubregInputReg.
SubIdx);
2133ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2134 assert(
Def->isSubregToReg() &&
"Invalid definition");
2142 if (DefSubReg !=
Def->getOperand(3).getImm())
2143 return ValueTrackerResult();
2146 if (
Def->getOperand(2).getSubReg())
2147 return ValueTrackerResult();
2149 return ValueTrackerResult(
Def->getOperand(2).getReg(),
2150 Def->getOperand(3).getImm());
2154ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2155 assert(
Def->isPHI() &&
"Invalid definition");
2156 ValueTrackerResult Res;
2159 for (
unsigned i = 1, e =
Def->getNumOperands(); i < e; i += 2) {
2160 const MachineOperand &MO =
Def->getOperand(i);
2165 return ValueTrackerResult();
2172ValueTrackerResult ValueTracker::getNextSourceImpl() {
2173 assert(Def &&
"This method needs a valid definition");
2175 assert(((
Def->getOperand(DefIdx).isDef() &&
2176 (DefIdx < Def->
getDesc().getNumDefs() ||
2177 Def->getDesc().isVariadic())) ||
2178 Def->getOperand(DefIdx).isImplicit()) &&
2181 return getNextSourceFromCopy();
2182 if (
Def->isBitcast())
2183 return getNextSourceFromBitcast();
2187 return ValueTrackerResult();
2188 if (
Def->isRegSequence() ||
Def->isRegSequenceLike())
2189 return getNextSourceFromRegSequence();
2190 if (
Def->isInsertSubreg() ||
Def->isInsertSubregLike())
2191 return getNextSourceFromInsertSubreg();
2192 if (
Def->isExtractSubreg() ||
Def->isExtractSubregLike())
2193 return getNextSourceFromExtractSubreg();
2194 if (
Def->isSubregToReg())
2195 return getNextSourceFromSubregToReg();
2197 return getNextSourceFromPHI();
2198 return ValueTrackerResult();
2201ValueTrackerResult ValueTracker::getNextSource() {
2205 return ValueTrackerResult();
2207 ValueTrackerResult Res = getNextSourceImpl();
2208 if (Res.isValid()) {
2212 bool OneRegSrc = Res.getNumSources() == 1;
2214 Reg = Res.getSrcReg(0);
2223 if (DI !=
MRI.def_end()) {
2226 DefSubReg = Res.getSrcSubReg(0);
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Virtual Register Rewriter
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
bool isLoopHeader(const BlockT *BB) const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
An RAII based helper class to modify MachineFunctionProperties when running pass.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setDelegate(Delegate *delegate)
Set the delegate.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_iterator< false, true, false, true, false > def_iterator
def_iterator/def_begin/def_end - Walk all defs of the specified register.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI char & PeepholeOptimizerLegacyID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI void initializePeepholeOptimizerLegacyPass(PassRegistry &)
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.