103#define DEBUG_TYPE "peephole-opt"
107 cl::desc(
"Aggressive extension optimization"));
111 cl::desc(
"Disable the peephole optimizer"));
118 cl::desc(
"Disable advanced copy optimization"));
122 cl::desc(
"Disable non-allocatable physical register copy optimization"));
128 cl::desc(
"Limit the length of PHI chains to lookup"));
134 cl::desc(
"Maximum length of recurrence chain when evaluating the benefit "
135 "of commuting operands"));
137STATISTIC(NumReuse,
"Number of extension results reused");
139STATISTIC(NumImmFold,
"Number of move immediate folded");
142STATISTIC(NumUncoalescableCopies,
"Number of uncoalescable copies optimized");
143STATISTIC(NumRewrittenCopies,
"Number of copies rewritten");
144STATISTIC(NumNAPhysCopies,
"Number of non-allocatable physical copies removed");
148class ValueTrackerResult;
149class RecurrenceInstr;
155 int CurrentSrcIdx = 0;
158 virtual ~Rewriter() =
default;
190 virtual bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg) = 0;
194class CopyRewriter :
public Rewriter {
197 assert(
MI.isCopy() &&
"Expected copy instruction");
199 ~CopyRewriter()
override =
default;
203 if (++CurrentSrcIdx > 1)
207 const MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
210 const MachineOperand &MODef = CopyLike.getOperand(0);
215 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
216 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
225class UncoalescableRewriter :
public Rewriter {
229 UncoalescableRewriter(MachineInstr &
MI) :
Rewriter(
MI) {
230 NumDefs =
MI.getDesc().getNumDefs();
240 if (CurrentSrcIdx == NumDefs)
243 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
245 if (CurrentSrcIdx == NumDefs)
251 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
258 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
264class InsertSubregRewriter :
public Rewriter {
267 assert(
MI.isInsertSubreg() &&
"Invalid instruction");
284 if (CurrentSrcIdx == 2)
288 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
290 const MachineOperand &MODef = CopyLike.getOperand(0);
298 (
unsigned)CopyLike.getOperand(3).getImm());
302 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
303 if (CurrentSrcIdx != 2)
306 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
314class ExtractSubregRewriter :
public Rewriter {
315 const TargetInstrInfo &TII;
318 ExtractSubregRewriter(MachineInstr &
MI,
const TargetInstrInfo &TII)
320 assert(
MI.isExtractSubreg() &&
"Invalid instruction");
331 if (CurrentSrcIdx == 1)
335 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
344 const MachineOperand &MODef = CopyLike.getOperand(0);
349 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
351 if (CurrentSrcIdx != 1)
354 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
365 CopyLike.removeOperand(2);
367 CopyLike.setDesc(TII.get(TargetOpcode::COPY));
370 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
376class RegSequenceRewriter :
public Rewriter {
379 assert(
MI.isRegSequence() &&
"Invalid instruction");
403 if (
static_cast<unsigned>(CurrentSrcIdx) >= CopyLike.getNumOperands())
406 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
407 Src.Reg = MOInsertedReg.
getReg();
412 Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
414 const MachineOperand &MODef = CopyLike.getOperand(0);
416 assert(MODef.
getSubReg() == 0 &&
"cannot have subregister def in SSA");
420 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
421 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
429 const TargetInstrInfo *TII =
nullptr;
430 const TargetRegisterInfo *TRI =
nullptr;
431 MachineRegisterInfo *MRI =
nullptr;
432 MachineDominatorTree *DT =
nullptr;
433 MachineLoopInfo *MLI =
nullptr;
436 PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
437 : DT(DT), MLI(MLI) {}
439 bool run(MachineFunction &MF);
441 using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
444 using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
447 bool optimizeCmpInstr(MachineInstr &
MI);
448 bool optimizeExtInstr(MachineInstr &
MI, MachineBasicBlock &
MBB,
449 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
450 bool optimizeSelect(MachineInstr &
MI,
451 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
452 bool optimizeCondBranch(MachineInstr &
MI);
454 bool optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter);
455 bool optimizeCoalescableCopy(MachineInstr &
MI);
456 bool optimizeUncoalescableCopy(MachineInstr &
MI,
457 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
458 bool optimizeRecurrence(MachineInstr &
PHI);
459 bool findNextSource(
const TargetRegisterClass *DefRC,
unsigned DefSubReg,
461 bool isMoveImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
462 DenseMap<Register, MachineInstr *> &ImmDefMIs);
463 bool foldImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
464 DenseMap<Register, MachineInstr *> &ImmDefMIs,
472 const SmallSet<Register, 2> &TargetReg,
473 RecurrenceCycle &RC);
480 bool foldRedundantCopy(MachineInstr &
MI);
491 foldRedundantNAPhysCopy(MachineInstr &
MI,
492 DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
494 bool isLoadFoldable(MachineInstr &
MI,
495 SmallSet<Register, 16> &FoldAsLoadDefCandidates);
499 static bool isCoalescableCopy(
const MachineInstr &
MI) {
502 return MI.isCopy() ||
504 MI.isExtractSubreg()));
509 static bool isUncoalescableCopy(
const MachineInstr &
MI) {
511 MI.isInsertSubregLike() ||
512 MI.isExtractSubregLike()));
515 MachineInstr &rewriteSource(MachineInstr &CopyLike,
RegSubRegPair Def,
516 RewriteMapTy &RewriteMap);
520 DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
523 void MF_HandleInsertion(MachineInstr &
MI)
override {}
530 unsigned SrcSubReg =
MI.getOperand(1).getSubReg();
531 if (!SrcReg.
isVirtual() && !MRI->isConstantPhysReg(SrcReg))
540 void deleteChangedCopy(MachineInstr &
MI) {
542 if (!getCopySrc(
MI, SrcPair))
545 auto It = CopySrcMIs.find(SrcPair);
546 if (It != CopySrcMIs.end() && It->second == &
MI)
547 CopySrcMIs.erase(It);
550 void MF_HandleRemoval(MachineInstr &
MI)
override { deleteChangedCopy(
MI); }
552 void MF_HandleChangeDesc(MachineInstr &
MI,
const MCInstrDesc &TID)
override {
553 deleteChangedCopy(
MI);
561 PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {}
563 bool runOnMachineFunction(MachineFunction &MF)
override;
565 void getAnalysisUsage(AnalysisUsage &AU)
const override {
576 MachineFunctionProperties getRequiredProperties()
const override {
577 return MachineFunctionProperties().setIsSSA();
587class RecurrenceInstr {
589 using IndexPair = std::pair<unsigned, unsigned>;
591 RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
592 RecurrenceInstr(MachineInstr *MI,
unsigned Idx1,
unsigned Idx2)
593 : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
595 MachineInstr *getMI()
const {
return MI; }
596 std::optional<IndexPair> getCommutePair()
const {
return CommutePair; }
600 std::optional<IndexPair> CommutePair;
606class ValueTrackerResult {
612 const MachineInstr *Inst =
nullptr;
615 ValueTrackerResult() =
default;
617 ValueTrackerResult(
Register Reg,
unsigned SubReg) { addSource(
Reg, SubReg); }
619 bool isValid()
const {
return getNumSources() > 0; }
621 void setInst(
const MachineInstr *
I) { Inst =
I; }
622 const MachineInstr *getInst()
const {
return Inst; }
629 void addSource(
Register SrcReg,
unsigned SrcSubReg) {
633 void setSource(
int Idx,
Register SrcReg,
unsigned SrcSubReg) {
634 assert(Idx < getNumSources() &&
"Reg pair source out of index");
638 int getNumSources()
const {
return RegSrcs.size(); }
643 assert(Idx < getNumSources() &&
"Reg source out of index");
644 return RegSrcs[Idx].Reg;
647 unsigned getSrcSubReg(
int Idx)
const {
648 assert(Idx < getNumSources() &&
"SubReg source out of index");
649 return RegSrcs[Idx].SubReg;
653 if (
Other.getInst() != getInst())
656 if (
Other.getNumSources() != getNumSources())
659 for (
int i = 0, e =
Other.getNumSources(); i != e; ++i)
660 if (
Other.getSrcReg(i) != getSrcReg(i) ||
661 Other.getSrcSubReg(i) != getSrcSubReg(i))
686 const MachineInstr *Def =
nullptr;
698 const MachineRegisterInfo &MRI;
701 const TargetInstrInfo *TII;
704 ValueTrackerResult getNextSourceImpl();
707 ValueTrackerResult getNextSourceFromCopy();
710 ValueTrackerResult getNextSourceFromBitcast();
713 ValueTrackerResult getNextSourceFromRegSequence();
716 ValueTrackerResult getNextSourceFromInsertSubreg();
719 ValueTrackerResult getNextSourceFromExtractSubreg();
722 ValueTrackerResult getNextSourceFromSubregToReg();
725 ValueTrackerResult getNextSourceFromPHI();
737 ValueTracker(
Register Reg,
unsigned DefSubReg,
const MachineRegisterInfo &MRI,
738 const TargetInstrInfo *TII =
nullptr)
739 : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
740 if (!Reg.isPhysical()) {
741 Def = MRI.getVRegDef(Reg);
742 DefIdx = MRI.def_begin(Reg).getOperandNo();
751 ValueTrackerResult getNextSource();
756char PeepholeOptimizerLegacy::ID = 0;
761 "Peephole Optimizations",
false,
false)
775bool PeepholeOptimizer::optimizeExtInstr(
780 if (!
TII->isCoalescableExtInstr(
MI, SrcReg, DstReg, SubIdx))
793 DstRC =
TRI->getSubClassWithSubReg(DstRC, SubIdx);
803 TRI->getSubClassWithSubReg(MRI->
getRegClass(SrcReg), SubIdx) !=
nullptr;
809 ReachedBBs.insert(UI.getParent());
817 bool ExtendLife =
true;
819 MachineInstr *UseMI = UseMO.getParent();
823 if (UseMI->isPHI()) {
829 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
849 if (
UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
853 if (UseMBB == &
MBB) {
855 if (!LocalMIs.count(
UseMI))
856 Uses.push_back(&UseMO);
857 }
else if (ReachedBBs.count(UseMBB)) {
860 Uses.push_back(&UseMO);
864 ExtendedUses.push_back(&UseMO);
873 if (ExtendLife && !ExtendedUses.empty())
875 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
880 SmallPtrSet<MachineBasicBlock *, 4> PHIBBs;
885 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
887 PHIBBs.insert(UI.getParent());
889 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
890 for (MachineOperand *UseMO : Uses) {
891 MachineInstr *UseMI = UseMO->getParent();
892 MachineBasicBlock *UseMBB = UseMI->getParent();
893 if (PHIBBs.count(UseMBB))
898 MRI->clearKillFlags(DstReg);
899 MRI->constrainRegClass(DstReg, DstRC);
917 RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
919 Register NewVR = MRI->createVirtualRegister(RC);
920 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
921 TII->get(TargetOpcode::COPY), NewVR)
922 .addReg(DstReg, {}, SubIdx);
926 UseMO->setReg(NewVR);
943 int64_t CmpMask, CmpValue;
950 if (
TII->optimizeCompareInstr(
MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
960bool PeepholeOptimizer::optimizeSelect(
961 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
962 assert(
MI.isSelect() &&
"Should only be called when MI->isSelect() is true");
963 if (!
TII->optimizeSelect(
MI, LocalMIs))
966 MI.eraseFromParent();
972bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &
MI) {
973 return TII->optimizeCondBranch(
MI);
989bool PeepholeOptimizer::findNextSource(
const TargetRegisterClass *DefRC,
992 RewriteMapTy &RewriteMap) {
1001 unsigned PHICount = 0;
1008 ValueTracker ValTracker(CurSrcPair.
Reg, CurSrcPair.
SubReg, *MRI,
TII);
1013 ValueTrackerResult Res = ValTracker.getNextSource();
1019 auto [InsertPt, WasInserted] = RewriteMap.try_emplace(CurSrcPair, Res);
1022 const ValueTrackerResult &CurSrcRes = InsertPt->second;
1024 assert(CurSrcRes == Res &&
"ValueTrackerResult found must match");
1027 if (CurSrcRes.getNumSources() > 1) {
1029 <<
"findNextSource: found PHI cycle, aborting...\n");
1037 unsigned NumSrcs = Res.getNumSources();
1045 for (
unsigned i = 0; i < NumSrcs; ++i)
1050 CurSrcPair = Res.getSrc(0);
1059 const TargetRegisterClass *SrcRC = MRI->
getRegClass(CurSrcPair.
Reg);
1060 if (!
TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
1066 if (PHICount > 0 && CurSrcPair.
SubReg != 0)
1072 }
while (!SrcToLook.
empty());
1075 return CurSrcPair.
Reg !=
Reg;
1087 assert(!SrcRegs.
empty() &&
"No sources to create a PHI instruction?");
1092 assert(SrcRegs[0].SubReg == 0 &&
"should not have subreg operand");
1096 TII.get(TargetOpcode::PHI), NewVR);
1098 unsigned MBBOpIdx = 2;
1100 MIB.
addReg(RegPair.Reg, {}, RegPair.SubReg);
1121 const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1122 bool HandleMultipleSources =
true) {
1125 ValueTrackerResult Res = RewriteMap.
lookup(LookupSrc);
1131 unsigned NumSrcs = Res.getNumSources();
1133 LookupSrc.
Reg = Res.getSrcReg(0);
1134 LookupSrc.
SubReg = Res.getSrcSubReg(0);
1139 if (!HandleMultipleSources)
1145 for (
unsigned i = 0; i < NumSrcs; ++i) {
1146 RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1164bool PeepholeOptimizer::optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter) {
1170 while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
1171 if (Dst.Reg.isPhysical()) {
1179 const TargetRegisterClass *DefRC = MRI->
getRegClass(Dst.Reg);
1182 RewriteMapTy RewriteMap;
1185 if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
1193 "should not rewrite source to original value");
1202 const TargetRegisterClass *WithSubRC =
1203 TRI->getSubClassWithSubReg(RC, NewSrc.
SubReg);
1210 if (CpyRewriter.RewriteCurrentSource(NewSrc.
Reg, NewSrc.
SubReg)) {
1222 NumRewrittenCopies +=
Changed;
1237bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &
MI) {
1238 assert(isCoalescableCopy(
MI) &&
"Invalid argument");
1239 assert(
MI.getDesc().getNumDefs() == 1 &&
1240 "Coalescer can understand multiple defs?!");
1241 const MachineOperand &MODef =
MI.getOperand(0);
1246 switch (
MI.getOpcode()) {
1247 case TargetOpcode::COPY:
1248 return optimizeCoalescableCopyImpl(CopyRewriter(
MI));
1249 case TargetOpcode::INSERT_SUBREG:
1250 return optimizeCoalescableCopyImpl(InsertSubregRewriter(
MI));
1251 case TargetOpcode::EXTRACT_SUBREG:
1252 return optimizeCoalescableCopyImpl(ExtractSubregRewriter(
MI, *
TII));
1253 case TargetOpcode::REG_SEQUENCE:
1254 return optimizeCoalescableCopyImpl(RegSequenceRewriter(
MI));
1257 if (
MI.isBitcast() ||
MI.isRegSequenceLike() ||
MI.isInsertSubregLike() ||
1258 MI.isExtractSubregLike())
1259 return optimizeCoalescableCopyImpl(UncoalescableRewriter(
MI));
1269MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1271 RewriteMapTy &RewriteMap) {
1272 assert(!
Def.Reg.isPhysical() &&
"We do not rewrite physical registers");
1282 const TargetRegisterClass *NewSrcRC = MRI->
getRegClass(NewSrc.
Reg);
1283 const TargetRegisterClass *WithSubRC =
1284 TRI->getSubClassWithSubReg(NewSrcRC, NewSrc.
SubReg);
1293 MachineInstr *NewCopy =
1295 TII->get(TargetOpcode::COPY), NewVReg)
1327bool PeepholeOptimizer::optimizeUncoalescableCopy(
1328 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
1329 assert(isUncoalescableCopy(
MI) &&
"Invalid argument");
1330 UncoalescableRewriter CpyRewriter(
MI);
1335 RewriteMapTy RewriteMap;
1339 while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1342 if (
Def.Reg.isPhysical())
1352 if (!findNextSource(DefRC,
Def.SubReg, Def, RewriteMap))
1361 MachineInstr &NewCopy = rewriteSource(
MI, Def, RewriteMap);
1362 LocalMIs.
insert(&NewCopy);
1367 MI.eraseFromParent();
1368 ++NumUncoalescableCopies;
1375bool PeepholeOptimizer::isLoadFoldable(
1376 MachineInstr &
MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1377 if (!
MI.canFoldAsLoad() || !
MI.mayLoad())
1379 const MCInstrDesc &MCID =
MI.getDesc();
1395bool PeepholeOptimizer::isMoveImmediate(
1396 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1397 DenseMap<Register, MachineInstr *> &ImmDefMIs) {
1398 const MCInstrDesc &MCID =
MI.getDesc();
1399 if (MCID.
getNumDefs() != 1 || !
MI.getOperand(0).isReg())
1406 if (!
MI.isMoveImmediate() && !
TII->getConstValDefinedInReg(
MI,
Reg, ImmVal))
1417bool PeepholeOptimizer::foldImmediate(
1418 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1419 DenseMap<Register, MachineInstr *> &ImmDefMIs,
bool &
Deleted) {
1421 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1422 MachineOperand &MO =
MI.getOperand(i);
1431 assert(
II != ImmDefMIs.
end() &&
"couldn't find immediate definition");
1432 if (
TII->foldImmediate(
MI, *
II->second,
Reg, MRI)) {
1444 MI.eraseFromParent();
1468bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &
MI) {
1469 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1472 if (!getCopySrc(
MI, SrcPair))
1479 if (CopySrcMIs.
insert(std::make_pair(SrcPair, &
MI)).second) {
1484 MachineInstr *PrevCopy = CopySrcMIs.
find(SrcPair)->second;
1487 "Unexpected mismatching subreg!");
1505bool PeepholeOptimizer::isNAPhysCopy(
Register Reg) {
1509bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1510 MachineInstr &
MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
1511 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1518 if (isNAPhysCopy(SrcReg) && DstReg.
isVirtual()) {
1522 NAPhysToVirtMIs.
insert({SrcReg, &
MI});
1526 if (!(SrcReg.
isVirtual() && isNAPhysCopy(DstReg)))
1530 auto PrevCopy = NAPhysToVirtMIs.
find(DstReg);
1531 if (PrevCopy == NAPhysToVirtMIs.
end()) {
1534 LLVM_DEBUG(
dbgs() <<
"NAPhysCopy: intervening clobber forbids erasing "
1540 if (PrevDstReg == SrcReg) {
1553 NAPhysToVirtMIs.
erase(PrevCopy);
1562bool PeepholeOptimizer::findTargetRecurrence(
1563 Register Reg,
const SmallSet<Register, 2> &TargetRegs,
1564 RecurrenceCycle &RC) {
1582 unsigned Idx =
MI.findRegisterUseOperandIdx(
Reg,
nullptr);
1586 if (
MI.getDesc().getNumDefs() != 1)
1589 MachineOperand &DefOp =
MI.getOperand(0);
1596 unsigned TiedUseIdx;
1597 if (!
MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1600 if (Idx == TiedUseIdx) {
1601 RC.push_back(RecurrenceInstr(&
MI));
1602 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1606 if (
TII->findCommutedOpIndices(
MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1607 RC.push_back(RecurrenceInstr(&
MI, Idx, CommIdx));
1608 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1633bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &
PHI) {
1634 SmallSet<Register, 2> TargetRegs;
1635 for (
unsigned Idx = 1; Idx <
PHI.getNumOperands(); Idx += 2) {
1636 MachineOperand &MO =
PHI.getOperand(Idx);
1643 if (findTargetRecurrence(
PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1647 for (
auto &RI : RC) {
1649 auto CP = RI.getCommutePair();
1652 TII->commuteInstruction(*(RI.getMI()),
false, (*CP).first,
1669 PeepholeOptimizer Impl(DT, MLI);
1681bool PeepholeOptimizerLegacy::runOnMachineFunction(
MachineFunction &MF) {
1685 ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1687 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1688 PeepholeOptimizer Impl(DT, MLI);
1689 return Impl.run(MF);
1694 LLVM_DEBUG(
dbgs() <<
"********** PEEPHOLE OPTIMIZER **********\n");
1708 bool SeenMoveImm =
false;
1741 if (
MI->isDebugInstr())
1744 if (
MI->isPosition())
1747 if (IsLoopHeader &&
MI->isPHI()) {
1748 if (optimizeRecurrence(*
MI)) {
1754 if (!
MI->isCopy()) {
1755 for (
const MachineOperand &MO :
MI->operands()) {
1759 if (MO.
isDef() && isNAPhysCopy(
Reg)) {
1761 if (Def != NAPhysToVirtMIs.
end()) {
1765 <<
"NAPhysCopy: invalidating because of " << *
MI);
1766 NAPhysToVirtMIs.
erase(Def);
1771 for (
auto &RegMI : NAPhysToVirtMIs) {
1775 <<
"NAPhysCopy: invalidating because of " << *
MI);
1776 NAPhysToVirtMIs.erase(Def);
1783 if (
MI->isImplicitDef() ||
MI->isKill())
1786 if (
MI->isInlineAsm() ||
MI->hasUnmodeledSideEffects()) {
1793 NAPhysToVirtMIs.clear();
1796 if ((isUncoalescableCopy(*
MI) &&
1797 optimizeUncoalescableCopy(*
MI, LocalMIs)) ||
1798 (
MI->isCompare() && optimizeCmpInstr(*
MI)) ||
1799 (
MI->isSelect() && optimizeSelect(*
MI, LocalMIs))) {
1806 if (
MI->isConditionalBranch() && optimizeCondBranch(*
MI)) {
1811 if (isCoalescableCopy(*
MI) && optimizeCoalescableCopy(*
MI)) {
1817 if (
MI->isCopy() && (foldRedundantCopy(*
MI) ||
1818 foldRedundantNAPhysCopy(*
MI, NAPhysToVirtMIs))) {
1821 MI->eraseFromParent();
1826 if (isMoveImmediate(*
MI, ImmDefRegs, ImmDefMIs)) {
1848 if (!isLoadFoldable(*
MI, FoldAsLoadDefCandidates) &&
1849 !FoldAsLoadDefCandidates.
empty()) {
1856 const MCInstrDesc &MIDesc =
MI->getDesc();
1857 for (
unsigned i = MIDesc.
getNumDefs(); i !=
MI->getNumOperands(); ++i) {
1858 const MachineOperand &MOp =
MI->getOperand(i);
1862 if (FoldAsLoadDefCandidates.
count(FoldAsLoadDefReg)) {
1867 Register FoldedReg = FoldAsLoadDefReg;
1868 MachineInstr *
DefMI =
nullptr;
1869 MachineInstr *CopyMI =
nullptr;
1870 if (MachineInstr *FoldMI =
TII->optimizeLoadInstr(
1871 *
MI, MRI, FoldAsLoadDefReg,
DefMI, CopyMI)) {
1882 if (
MI->shouldUpdateAdditionalCallInfo())
1883 MI->getMF()->moveAdditionalCallInfo(
MI, FoldMI);
1884 MI->eraseFromParent();
1887 FoldAsLoadDefCandidates.
erase(FoldedReg);
1901 if (
MI->isLoadFoldBarrier()) {
1903 FoldAsLoadDefCandidates.
clear();
1908 MF.resetDelegate(
this);
1912ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1913 assert(
Def->isCopy() &&
"Invalid definition");
1918 assert(
Def->getNumOperands() -
Def->getNumImplicitOperands() == 2 &&
1919 "Invalid number of operands");
1920 assert(!
Def->hasImplicitDef() &&
"Only implicit uses are allowed");
1921 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1924 const MachineOperand &Src =
Def->getOperand(1);
1926 return ValueTrackerResult();
1929 unsigned SubReg = Src.getSubReg();
1932 SubReg =
TRI->composeSubRegIndices(SubReg, DefSubReg);
1936 const TargetRegisterClass *RegRC = MRI.
getRegClass(SrcReg);
1937 if (!
TRI->isSubRegValidForRegClass(RegRC, SubReg))
1938 return ValueTrackerResult();
1940 if (!
TRI->getSubReg(SrcReg, SubReg))
1941 return ValueTrackerResult();
1945 return ValueTrackerResult(SrcReg, SubReg);
1948ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1949 assert(
Def->isBitcast() &&
"Invalid definition");
1952 if (
Def->mayRaiseFPException() ||
Def->hasUnmodeledSideEffects())
1953 return ValueTrackerResult();
1956 if (
Def->getDesc().getNumDefs() != 1)
1957 return ValueTrackerResult();
1959 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1961 unsigned SrcIdx =
Def->getNumOperands();
1962 for (
unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx;
OpIdx != EndOpIdx;
1964 const MachineOperand &MO =
Def->getOperand(
OpIdx);
1970 assert(!MO.
isDef() &&
"We should have skipped all the definitions by now");
1971 if (SrcIdx != EndOpIdx)
1973 return ValueTrackerResult();
1979 if (SrcIdx >=
Def->getNumOperands())
1980 return ValueTrackerResult();
1982 const MachineOperand &DefOp =
Def->getOperand(DefIdx);
1987 if (
UseMI.isSubregToReg())
1988 return ValueTrackerResult();
1991 const MachineOperand &Src =
Def->getOperand(SrcIdx);
1993 return ValueTrackerResult();
1994 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1997ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
1998 assert((
Def->isRegSequence() ||
Def->isRegSequenceLike()) &&
1999 "Invalid definition");
2001 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"illegal subregister def");
2004 if (!
TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
2005 return ValueTrackerResult();
2013 if (RegSeqInput.SubIdx == DefSubReg)
2014 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
2022 LaneBitmask DefMask =
TRI->getSubRegIndexLaneMask(DefSubReg);
2023 LaneBitmask ThisOpRegMask =
TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
2029 if ((DefMask & ThisOpRegMask) != DefMask)
2032 unsigned ReverseDefCompose =
2033 TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg);
2034 if (!ReverseDefCompose)
2037 unsigned ComposedDefInSrcReg1 =
2038 TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
2044 const TargetRegisterClass *SrcRC = MRI.
getRegClass(RegSeqInput.Reg);
2045 if (!
TRI->isSubRegValidForRegClass(SrcRC, ComposedDefInSrcReg1))
2046 return ValueTrackerResult();
2048 return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
2054 return ValueTrackerResult();
2057ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2058 assert((
Def->isInsertSubreg() ||
Def->isInsertSubregLike()) &&
2059 "Invalid definition");
2060 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subreg defs in SSA");
2064 if (!
TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2065 return ValueTrackerResult();
2074 if (InsertedReg.
SubIdx == DefSubReg) {
2075 return ValueTrackerResult(InsertedReg.
Reg, InsertedReg.
SubReg);
2080 const MachineOperand &MODef =
Def->getOperand(DefIdx);
2086 return ValueTrackerResult();
2091 if ((
TRI->getSubRegIndexLaneMask(DefSubReg) &
2092 TRI->getSubRegIndexLaneMask(InsertedReg.
SubIdx))
2094 return ValueTrackerResult();
2097 return ValueTrackerResult(
BaseReg.Reg, DefSubReg);
2100ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2101 assert((
Def->isExtractSubreg() ||
Def->isExtractSubregLike()) &&
2102 "Invalid definition");
2109 return ValueTrackerResult();
2112 if (!
TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2113 return ValueTrackerResult();
2117 if (ExtractSubregInputReg.
SubReg)
2118 return ValueTrackerResult();
2120 return ValueTrackerResult(ExtractSubregInputReg.
Reg,
2121 ExtractSubregInputReg.
SubIdx);
2124ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2125 assert(
Def->isSubregToReg() &&
"Invalid definition");
2133 if (DefSubReg !=
Def->getOperand(2).getImm())
2134 return ValueTrackerResult();
2137 if (
Def->getOperand(1).getSubReg())
2138 return ValueTrackerResult();
2140 return ValueTrackerResult(
Def->getOperand(1).getReg(),
2141 Def->getOperand(2).getImm());
2145ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2146 assert(
Def->isPHI() &&
"Invalid definition");
2147 ValueTrackerResult Res;
2150 for (
unsigned i = 1, e =
Def->getNumOperands(); i < e; i += 2) {
2151 const MachineOperand &MO =
Def->getOperand(i);
2156 return ValueTrackerResult();
2163ValueTrackerResult ValueTracker::getNextSourceImpl() {
2164 assert(Def &&
"This method needs a valid definition");
2166 assert(((
Def->getOperand(DefIdx).isDef() &&
2167 (DefIdx < Def->
getDesc().getNumDefs() ||
2168 Def->getDesc().isVariadic())) ||
2169 Def->getOperand(DefIdx).isImplicit()) &&
2172 return getNextSourceFromCopy();
2173 if (
Def->isBitcast())
2174 return getNextSourceFromBitcast();
2178 return ValueTrackerResult();
2179 if (
Def->isRegSequence() ||
Def->isRegSequenceLike())
2180 return getNextSourceFromRegSequence();
2181 if (
Def->isInsertSubreg() ||
Def->isInsertSubregLike())
2182 return getNextSourceFromInsertSubreg();
2183 if (
Def->isExtractSubreg() ||
Def->isExtractSubregLike())
2184 return getNextSourceFromExtractSubreg();
2185 if (
Def->isSubregToReg())
2186 return getNextSourceFromSubregToReg();
2188 return getNextSourceFromPHI();
2189 return ValueTrackerResult();
2192ValueTrackerResult ValueTracker::getNextSource() {
2196 return ValueTrackerResult();
2198 ValueTrackerResult Res = getNextSourceImpl();
2199 if (Res.isValid()) {
2203 bool OneRegSrc = Res.getNumSources() == 1;
2205 Reg = Res.getSrcReg(0);
2217 DefSubReg = Res.getSrcSubReg(0);
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Virtual Register Rewriter
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
bool isLoopHeader(const BlockT *BB) const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
An RAII based helper class to modify MachineFunctionProperties when running pass.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setDelegate(Delegate *delegate)
Set the delegate.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI void markUsesInDebugValueAsUndef(Register Reg) const
markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the specified register as undefined wh...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
def_iterator def_begin(Register RegNo) const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
LLVM_ABI bool hasOneNonDBGUser(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
defusechain_iterator< false, true, false, true, false > def_iterator
def_iterator/def_begin/def_end - Walk all defs of the specified register.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static def_iterator def_end()
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
NodeAddr< DefNode * > Def
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI char & PeepholeOptimizerLegacyID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.