103#define DEBUG_TYPE "peephole-opt"
107 cl::desc(
"Aggressive extension optimization"));
111 cl::desc(
"Disable the peephole optimizer"));
118 cl::desc(
"Disable advanced copy optimization"));
122 cl::desc(
"Disable non-allocatable physical register copy optimization"));
128 cl::desc(
"Limit the length of PHI chains to lookup"));
134 cl::desc(
"Maximum length of recurrence chain when evaluating the benefit "
135 "of commuting operands"));
137STATISTIC(NumReuse,
"Number of extension results reused");
139STATISTIC(NumImmFold,
"Number of move immediate folded");
142STATISTIC(NumUncoalescableCopies,
"Number of uncoalescable copies optimized");
143STATISTIC(NumRewrittenCopies,
"Number of copies rewritten");
144STATISTIC(NumNAPhysCopies,
"Number of non-allocatable physical copies removed");
148class ValueTrackerResult;
149class RecurrenceInstr;
155 int CurrentSrcIdx = 0;
158 virtual ~Rewriter() =
default;
190 virtual bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg) = 0;
194class CopyRewriter :
public Rewriter {
197 assert(
MI.isCopy() &&
"Expected copy instruction");
199 virtual ~CopyRewriter() =
default;
203 if (++CurrentSrcIdx > 1)
207 const MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
210 const MachineOperand &MODef = CopyLike.getOperand(0);
215 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
216 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
225class UncoalescableRewriter :
public Rewriter {
229 UncoalescableRewriter(MachineInstr &
MI) :
Rewriter(
MI) {
230 NumDefs =
MI.getDesc().getNumDefs();
240 if (CurrentSrcIdx == NumDefs)
243 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
245 if (CurrentSrcIdx == NumDefs)
251 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
258 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
264class InsertSubregRewriter :
public Rewriter {
267 assert(
MI.isInsertSubreg() &&
"Invalid instruction");
284 if (CurrentSrcIdx == 2)
288 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
290 const MachineOperand &MODef = CopyLike.getOperand(0);
298 (
unsigned)CopyLike.getOperand(3).getImm());
302 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
303 if (CurrentSrcIdx != 2)
306 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
314class ExtractSubregRewriter :
public Rewriter {
315 const TargetInstrInfo &TII;
318 ExtractSubregRewriter(MachineInstr &
MI,
const TargetInstrInfo &TII)
320 assert(
MI.isExtractSubreg() &&
"Invalid instruction");
331 if (CurrentSrcIdx == 1)
335 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
344 const MachineOperand &MODef = CopyLike.getOperand(0);
349 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
351 if (CurrentSrcIdx != 1)
354 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
365 CopyLike.removeOperand(2);
367 CopyLike.setDesc(TII.get(TargetOpcode::COPY));
370 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
376class RegSequenceRewriter :
public Rewriter {
379 assert(
MI.isRegSequence() &&
"Invalid instruction");
403 if (
static_cast<unsigned>(CurrentSrcIdx) >= CopyLike.getNumOperands())
406 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
407 Src.Reg = MOInsertedReg.
getReg();
412 Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
414 const MachineOperand &MODef = CopyLike.getOperand(0);
416 assert(MODef.
getSubReg() == 0 &&
"cannot have subregister def in SSA");
420 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
421 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
429 const TargetInstrInfo *TII =
nullptr;
430 const TargetRegisterInfo *TRI =
nullptr;
431 MachineRegisterInfo *MRI =
nullptr;
432 MachineDominatorTree *DT =
nullptr;
433 MachineLoopInfo *MLI =
nullptr;
436 PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
437 : DT(DT), MLI(MLI) {}
439 bool run(MachineFunction &MF);
441 using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
444 using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
447 bool optimizeCmpInstr(MachineInstr &
MI);
448 bool optimizeExtInstr(MachineInstr &
MI, MachineBasicBlock &
MBB,
449 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
450 bool optimizeSelect(MachineInstr &
MI,
451 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
452 bool optimizeCondBranch(MachineInstr &
MI);
454 bool optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter);
455 bool optimizeCoalescableCopy(MachineInstr &
MI);
456 bool optimizeUncoalescableCopy(MachineInstr &
MI,
457 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
458 bool optimizeRecurrence(MachineInstr &
PHI);
459 bool findNextSource(
const TargetRegisterClass *DefRC,
unsigned DefSubReg,
461 bool isMoveImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
462 DenseMap<Register, MachineInstr *> &ImmDefMIs);
463 bool foldImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
464 DenseMap<Register, MachineInstr *> &ImmDefMIs,
472 const SmallSet<Register, 2> &TargetReg,
473 RecurrenceCycle &RC);
480 bool foldRedundantCopy(MachineInstr &
MI);
491 foldRedundantNAPhysCopy(MachineInstr &
MI,
492 DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
494 bool isLoadFoldable(MachineInstr &
MI,
495 SmallSet<Register, 16> &FoldAsLoadDefCandidates);
499 static bool isCoalescableCopy(
const MachineInstr &
MI) {
502 return MI.isCopy() ||
504 MI.isExtractSubreg()));
509 static bool isUncoalescableCopy(
const MachineInstr &
MI) {
511 MI.isInsertSubregLike() ||
512 MI.isExtractSubregLike()));
515 MachineInstr &rewriteSource(MachineInstr &CopyLike,
RegSubRegPair Def,
516 RewriteMapTy &RewriteMap);
520 DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
523 void MF_HandleInsertion(MachineInstr &
MI)
override {}
530 unsigned SrcSubReg =
MI.getOperand(1).getSubReg();
531 if (!SrcReg.
isVirtual() && !MRI->isConstantPhysReg(SrcReg))
540 void deleteChangedCopy(MachineInstr &
MI) {
542 if (!getCopySrc(
MI, SrcPair))
545 auto It = CopySrcMIs.find(SrcPair);
546 if (It != CopySrcMIs.end() && It->second == &
MI)
547 CopySrcMIs.erase(It);
550 void MF_HandleRemoval(MachineInstr &
MI)
override { deleteChangedCopy(
MI); }
552 void MF_HandleChangeDesc(MachineInstr &
MI,
const MCInstrDesc &TID)
override {
553 deleteChangedCopy(
MI);
561 PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {
565 bool runOnMachineFunction(MachineFunction &MF)
override;
567 void getAnalysisUsage(AnalysisUsage &AU)
const override {
578 MachineFunctionProperties getRequiredProperties()
const override {
579 return MachineFunctionProperties().setIsSSA();
589class RecurrenceInstr {
591 using IndexPair = std::pair<unsigned, unsigned>;
593 RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
594 RecurrenceInstr(MachineInstr *MI,
unsigned Idx1,
unsigned Idx2)
595 : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
597 MachineInstr *getMI()
const {
return MI; }
598 std::optional<IndexPair> getCommutePair()
const {
return CommutePair; }
602 std::optional<IndexPair> CommutePair;
608class ValueTrackerResult {
614 const MachineInstr *Inst =
nullptr;
617 ValueTrackerResult() =
default;
621 bool isValid()
const {
return getNumSources() > 0; }
623 void setInst(
const MachineInstr *
I) { Inst =
I; }
624 const MachineInstr *getInst()
const {
return Inst; }
631 void addSource(
Register SrcReg,
unsigned SrcSubReg) {
635 void setSource(
int Idx,
Register SrcReg,
unsigned SrcSubReg) {
636 assert(Idx < getNumSources() &&
"Reg pair source out of index");
640 int getNumSources()
const {
return RegSrcs.size(); }
645 assert(Idx < getNumSources() &&
"Reg source out of index");
646 return RegSrcs[Idx].Reg;
649 unsigned getSrcSubReg(
int Idx)
const {
650 assert(Idx < getNumSources() &&
"SubReg source out of index");
651 return RegSrcs[Idx].SubReg;
655 if (
Other.getInst() != getInst())
658 if (
Other.getNumSources() != getNumSources())
661 for (
int i = 0, e =
Other.getNumSources(); i != e; ++i)
662 if (
Other.getSrcReg(i) != getSrcReg(i) ||
663 Other.getSrcSubReg(i) != getSrcSubReg(i))
688 const MachineInstr *Def =
nullptr;
700 const MachineRegisterInfo &MRI;
703 const TargetInstrInfo *TII;
706 ValueTrackerResult getNextSourceImpl();
709 ValueTrackerResult getNextSourceFromCopy();
712 ValueTrackerResult getNextSourceFromBitcast();
715 ValueTrackerResult getNextSourceFromRegSequence();
718 ValueTrackerResult getNextSourceFromInsertSubreg();
721 ValueTrackerResult getNextSourceFromExtractSubreg();
724 ValueTrackerResult getNextSourceFromSubregToReg();
727 ValueTrackerResult getNextSourceFromPHI();
739 ValueTracker(
Register Reg,
unsigned DefSubReg,
const MachineRegisterInfo &MRI,
740 const TargetInstrInfo *TII =
nullptr)
741 : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
742 if (!Reg.isPhysical()) {
743 Def = MRI.getVRegDef(Reg);
744 DefIdx = MRI.def_begin(Reg).getOperandNo();
753 ValueTrackerResult getNextSource();
758char PeepholeOptimizerLegacy::ID = 0;
763 "Peephole Optimizations",
false,
false)
777bool PeepholeOptimizer::optimizeExtInstr(
782 if (!
TII->isCoalescableExtInstr(
MI, SrcReg, DstReg, SubIdx))
788 if (
MRI->hasOneNonDBGUse(SrcReg))
795 DstRC =
TRI->getSubClassWithSubReg(DstRC, SubIdx);
805 TRI->getSubClassWithSubReg(
MRI->getRegClass(SrcReg), SubIdx) !=
nullptr;
811 ReachedBBs.insert(UI.getParent());
819 bool ExtendLife =
true;
821 MachineInstr *UseMI = UseMO.getParent();
825 if (UseMI->isPHI()) {
831 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
851 if (
UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
855 if (UseMBB == &
MBB) {
857 if (!LocalMIs.count(
UseMI))
858 Uses.push_back(&UseMO);
859 }
else if (ReachedBBs.count(UseMBB)) {
862 Uses.push_back(&UseMO);
866 ExtendedUses.push_back(&UseMO);
875 if (ExtendLife && !ExtendedUses.empty())
877 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
882 SmallPtrSet<MachineBasicBlock *, 4> PHIBBs;
887 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
889 PHIBBs.insert(UI.getParent());
891 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
892 for (MachineOperand *UseMO : Uses) {
893 MachineInstr *UseMI = UseMO->getParent();
894 MachineBasicBlock *UseMBB = UseMI->getParent();
895 if (PHIBBs.count(UseMBB))
900 MRI->clearKillFlags(DstReg);
901 MRI->constrainRegClass(DstReg, DstRC);
919 RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
921 Register NewVR = MRI->createVirtualRegister(RC);
922 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
923 TII->get(TargetOpcode::COPY), NewVR)
924 .addReg(DstReg, 0, SubIdx);
928 UseMO->setReg(NewVR);
945 int64_t CmpMask, CmpValue;
952 if (
TII->optimizeCompareInstr(
MI, SrcReg, SrcReg2, CmpMask, CmpValue,
MRI)) {
962bool PeepholeOptimizer::optimizeSelect(
963 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
965 unsigned FalseOp = 0;
966 bool Optimizable =
false;
968 if (
TII->analyzeSelect(
MI,
Cond, TrueOp, FalseOp, Optimizable))
972 if (!
TII->optimizeSelect(
MI, LocalMIs))
975 MI.eraseFromParent();
981bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &
MI) {
982 return TII->optimizeCondBranch(
MI);
998bool PeepholeOptimizer::findNextSource(
const TargetRegisterClass *DefRC,
1001 RewriteMapTy &RewriteMap) {
1010 unsigned PHICount = 0;
1017 ValueTracker ValTracker(CurSrcPair.
Reg, CurSrcPair.
SubReg, *
MRI,
TII);
1022 ValueTrackerResult Res = ValTracker.getNextSource();
1028 auto [InsertPt, WasInserted] = RewriteMap.try_emplace(CurSrcPair, Res);
1031 const ValueTrackerResult &CurSrcRes = InsertPt->second;
1033 assert(CurSrcRes == Res &&
"ValueTrackerResult found must match");
1036 if (CurSrcRes.getNumSources() > 1) {
1038 <<
"findNextSource: found PHI cycle, aborting...\n");
1046 unsigned NumSrcs = Res.getNumSources();
1054 for (
unsigned i = 0; i < NumSrcs; ++i)
1059 CurSrcPair = Res.getSrc(0);
1068 const TargetRegisterClass *SrcRC =
MRI->getRegClass(CurSrcPair.
Reg);
1069 if (!
TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
1075 if (PHICount > 0 && CurSrcPair.
SubReg != 0)
1081 }
while (!SrcToLook.
empty());
1084 return CurSrcPair.
Reg !=
Reg;
1096 assert(!SrcRegs.
empty() &&
"No sources to create a PHI instruction?");
1101 assert(SrcRegs[0].
SubReg == 0 &&
"should not have subreg operand");
1102 Register NewVR =
MRI.createVirtualRegister(NewRC);
1105 TII.get(TargetOpcode::PHI), NewVR);
1107 unsigned MBBOpIdx = 2;
1109 MIB.
addReg(RegPair.Reg, 0, RegPair.SubReg);
1114 MRI.clearKillFlags(RegPair.Reg);
1130 const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1131 bool HandleMultipleSources =
true) {
1134 ValueTrackerResult Res = RewriteMap.
lookup(LookupSrc);
1140 unsigned NumSrcs = Res.getNumSources();
1142 LookupSrc.
Reg = Res.getSrcReg(0);
1143 LookupSrc.
SubReg = Res.getSrcSubReg(0);
1148 if (!HandleMultipleSources)
1154 for (
unsigned i = 0; i < NumSrcs; ++i) {
1155 RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1173bool PeepholeOptimizer::optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter) {
1179 while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
1180 if (Dst.Reg.isPhysical()) {
1188 const TargetRegisterClass *DefRC =
MRI->getRegClass(Dst.Reg);
1191 RewriteMapTy RewriteMap;
1194 if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
1202 "should not rewrite source to original value");
1207 if (CpyRewriter.RewriteCurrentSource(NewSrc.
Reg, NewSrc.
SubReg)) {
1209 MRI->clearKillFlags(NewSrc.
Reg);
1219 NumRewrittenCopies +=
Changed;
1234bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &
MI) {
1235 assert(isCoalescableCopy(
MI) &&
"Invalid argument");
1236 assert(
MI.getDesc().getNumDefs() == 1 &&
1237 "Coalescer can understand multiple defs?!");
1238 const MachineOperand &MODef =
MI.getOperand(0);
1243 switch (
MI.getOpcode()) {
1244 case TargetOpcode::COPY:
1245 return optimizeCoalescableCopyImpl(CopyRewriter(
MI));
1246 case TargetOpcode::INSERT_SUBREG:
1247 return optimizeCoalescableCopyImpl(InsertSubregRewriter(
MI));
1248 case TargetOpcode::EXTRACT_SUBREG:
1249 return optimizeCoalescableCopyImpl(ExtractSubregRewriter(
MI, *
TII));
1250 case TargetOpcode::REG_SEQUENCE:
1251 return optimizeCoalescableCopyImpl(RegSequenceRewriter(
MI));
1254 if (
MI.isBitcast() ||
MI.isRegSequenceLike() ||
MI.isInsertSubregLike() ||
1255 MI.isExtractSubregLike())
1256 return optimizeCoalescableCopyImpl(UncoalescableRewriter(
MI));
1266MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1268 RewriteMapTy &RewriteMap) {
1269 assert(!
Def.Reg.isPhysical() &&
"We do not rewrite physical registers");
1275 const TargetRegisterClass *DefRC =
MRI->getRegClass(
Def.Reg);
1276 Register NewVReg =
MRI->createVirtualRegister(DefRC);
1278 MachineInstr *NewCopy =
1280 TII->get(TargetOpcode::COPY), NewVReg)
1291 MRI->replaceRegWith(
Def.Reg, NewVReg);
1292 MRI->clearKillFlags(NewVReg);
1296 MRI->clearKillFlags(NewSrc.
Reg);
1312bool PeepholeOptimizer::optimizeUncoalescableCopy(
1313 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
1314 assert(isUncoalescableCopy(
MI) &&
"Invalid argument");
1315 UncoalescableRewriter CpyRewriter(
MI);
1320 RewriteMapTy RewriteMap;
1324 while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1327 if (
Def.Reg.isPhysical())
1333 const TargetRegisterClass *DefRC =
MRI->getRegClass(
Def.Reg);
1337 if (!findNextSource(DefRC,
Def.SubReg, Def, RewriteMap))
1346 MachineInstr &NewCopy = rewriteSource(
MI, Def, RewriteMap);
1347 LocalMIs.
insert(&NewCopy);
1352 MI.eraseFromParent();
1353 ++NumUncoalescableCopies;
1360bool PeepholeOptimizer::isLoadFoldable(
1361 MachineInstr &
MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1362 if (!
MI.canFoldAsLoad() || !
MI.mayLoad())
1364 const MCInstrDesc &MCID =
MI.getDesc();
1373 MRI->hasOneNonDBGUser(
Reg)) {
1380bool PeepholeOptimizer::isMoveImmediate(
1381 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1382 DenseMap<Register, MachineInstr *> &ImmDefMIs) {
1383 const MCInstrDesc &MCID =
MI.getDesc();
1384 if (MCID.
getNumDefs() != 1 || !
MI.getOperand(0).isReg())
1391 if (!
MI.isMoveImmediate() && !
TII->getConstValDefinedInReg(
MI,
Reg, ImmVal))
1402bool PeepholeOptimizer::foldImmediate(
1403 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1404 DenseMap<Register, MachineInstr *> &ImmDefMIs,
bool &
Deleted) {
1406 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1407 MachineOperand &MO =
MI.getOperand(i);
1415 DenseMap<Register, MachineInstr *>::iterator
II = ImmDefMIs.
find(
Reg);
1416 assert(
II != ImmDefMIs.
end() &&
"couldn't find immediate definition");
1422 if (
MRI->getVRegDef(
Reg) &&
1426 MRI->getRegClass(DstReg) ==
MRI->getRegClass(
Reg)) {
1427 MRI->replaceRegWith(DstReg,
Reg);
1428 MI.eraseFromParent();
1452bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &
MI) {
1453 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1456 if (!getCopySrc(
MI, SrcPair))
1463 if (CopySrcMIs.
insert(std::make_pair(SrcPair, &
MI)).second) {
1468 MachineInstr *PrevCopy = CopySrcMIs.
find(SrcPair)->second;
1471 "Unexpected mismatching subreg!");
1479 if (
MRI->getRegClass(DstReg) !=
MRI->getRegClass(PrevDstReg))
1482 MRI->replaceRegWith(DstReg, PrevDstReg);
1485 MRI->clearKillFlags(PrevDstReg);
1489bool PeepholeOptimizer::isNAPhysCopy(
Register Reg) {
1493bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1494 MachineInstr &
MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
1495 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1502 if (isNAPhysCopy(SrcReg) && DstReg.
isVirtual()) {
1506 NAPhysToVirtMIs.
insert({SrcReg, &
MI});
1510 if (!(SrcReg.
isVirtual() && isNAPhysCopy(DstReg)))
1514 auto PrevCopy = NAPhysToVirtMIs.
find(DstReg);
1515 if (PrevCopy == NAPhysToVirtMIs.
end()) {
1518 LLVM_DEBUG(
dbgs() <<
"NAPhysCopy: intervening clobber forbids erasing "
1524 if (PrevDstReg == SrcReg) {
1537 NAPhysToVirtMIs.
erase(PrevCopy);
1546bool PeepholeOptimizer::findTargetRecurrence(
1547 Register Reg,
const SmallSet<Register, 2> &TargetRegs,
1548 RecurrenceCycle &RC) {
1558 if (!
MRI->hasOneNonDBGUse(
Reg))
1565 MachineInstr &
MI = *(
MRI->use_instr_nodbg_begin(
Reg));
1566 unsigned Idx =
MI.findRegisterUseOperandIdx(
Reg,
nullptr);
1570 if (
MI.getDesc().getNumDefs() != 1)
1573 MachineOperand &DefOp =
MI.getOperand(0);
1580 unsigned TiedUseIdx;
1581 if (!
MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1584 if (Idx == TiedUseIdx) {
1585 RC.push_back(RecurrenceInstr(&
MI));
1586 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1590 if (
TII->findCommutedOpIndices(
MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1591 RC.push_back(RecurrenceInstr(&
MI, Idx, CommIdx));
1592 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1617bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &
PHI) {
1618 SmallSet<Register, 2> TargetRegs;
1619 for (
unsigned Idx = 1; Idx <
PHI.getNumOperands(); Idx += 2) {
1620 MachineOperand &MO =
PHI.getOperand(Idx);
1627 if (findTargetRecurrence(
PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1631 for (
auto &RI : RC) {
1633 auto CP = RI.getCommutePair();
1636 TII->commuteInstruction(*(RI.getMI()),
false, (*CP).first,
1653 PeepholeOptimizer Impl(DT, MLI);
1665bool PeepholeOptimizerLegacy::runOnMachineFunction(
MachineFunction &MF) {
1669 ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1671 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1672 PeepholeOptimizer Impl(DT, MLI);
1673 return Impl.run(MF);
1678 LLVM_DEBUG(
dbgs() <<
"********** PEEPHOLE OPTIMIZER **********\n");
1692 bool SeenMoveImm =
false;
1725 if (
MI->isDebugInstr())
1728 if (
MI->isPosition())
1731 if (IsLoopHeader &&
MI->isPHI()) {
1732 if (optimizeRecurrence(*
MI)) {
1738 if (!
MI->isCopy()) {
1739 for (
const MachineOperand &MO :
MI->operands()) {
1743 if (MO.
isDef() && isNAPhysCopy(
Reg)) {
1745 if (Def != NAPhysToVirtMIs.
end()) {
1749 <<
"NAPhysCopy: invalidating because of " << *
MI);
1750 NAPhysToVirtMIs.
erase(Def);
1755 for (
auto &RegMI : NAPhysToVirtMIs) {
1759 <<
"NAPhysCopy: invalidating because of " << *
MI);
1760 NAPhysToVirtMIs.erase(Def);
1767 if (
MI->isImplicitDef() ||
MI->isKill())
1770 if (
MI->isInlineAsm() ||
MI->hasUnmodeledSideEffects()) {
1777 NAPhysToVirtMIs.clear();
1780 if ((isUncoalescableCopy(*
MI) &&
1781 optimizeUncoalescableCopy(*
MI, LocalMIs)) ||
1782 (
MI->isCompare() && optimizeCmpInstr(*
MI)) ||
1783 (
MI->isSelect() && optimizeSelect(*
MI, LocalMIs))) {
1790 if (
MI->isConditionalBranch() && optimizeCondBranch(*
MI)) {
1795 if (isCoalescableCopy(*
MI) && optimizeCoalescableCopy(*
MI)) {
1801 if (
MI->isCopy() && (foldRedundantCopy(*
MI) ||
1802 foldRedundantNAPhysCopy(*
MI, NAPhysToVirtMIs))) {
1805 MI->eraseFromParent();
1810 if (isMoveImmediate(*
MI, ImmDefRegs, ImmDefMIs)) {
1832 if (!isLoadFoldable(*
MI, FoldAsLoadDefCandidates) &&
1833 !FoldAsLoadDefCandidates.
empty()) {
1840 const MCInstrDesc &MIDesc =
MI->getDesc();
1841 for (
unsigned i = MIDesc.
getNumDefs(); i !=
MI->getNumOperands(); ++i) {
1842 const MachineOperand &MOp =
MI->getOperand(i);
1846 if (FoldAsLoadDefCandidates.
count(FoldAsLoadDefReg)) {
1851 Register FoldedReg = FoldAsLoadDefReg;
1852 MachineInstr *
DefMI =
nullptr;
1853 if (MachineInstr *FoldMI =
1854 TII->optimizeLoadInstr(*
MI,
MRI, FoldAsLoadDefReg,
DefMI)) {
1863 if (
MI->shouldUpdateAdditionalCallInfo())
1864 MI->getMF()->moveAdditionalCallInfo(
MI, FoldMI);
1865 MI->eraseFromParent();
1867 MRI->markUsesInDebugValueAsUndef(FoldedReg);
1868 FoldAsLoadDefCandidates.
erase(FoldedReg);
1882 if (
MI->isLoadFoldBarrier()) {
1884 FoldAsLoadDefCandidates.
clear();
1889 MF.resetDelegate(
this);
1893ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1894 assert(
Def->isCopy() &&
"Invalid definition");
1899 assert(
Def->getNumOperands() -
Def->getNumImplicitOperands() == 2 &&
1900 "Invalid number of operands");
1901 assert(!
Def->hasImplicitDef() &&
"Only implicit uses are allowed");
1902 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1905 const MachineOperand &Src =
Def->getOperand(1);
1907 return ValueTrackerResult();
1908 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1911ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1912 assert(
Def->isBitcast() &&
"Invalid definition");
1915 if (
Def->mayRaiseFPException() ||
Def->hasUnmodeledSideEffects())
1916 return ValueTrackerResult();
1919 if (
Def->getDesc().getNumDefs() != 1)
1920 return ValueTrackerResult();
1922 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1924 unsigned SrcIdx =
Def->getNumOperands();
1925 for (
unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx;
OpIdx != EndOpIdx;
1927 const MachineOperand &MO =
Def->getOperand(
OpIdx);
1933 assert(!MO.
isDef() &&
"We should have skipped all the definitions by now");
1934 if (SrcIdx != EndOpIdx)
1936 return ValueTrackerResult();
1942 if (SrcIdx >=
Def->getNumOperands())
1943 return ValueTrackerResult();
1945 const MachineOperand &DefOp =
Def->getOperand(DefIdx);
1949 for (
const MachineInstr &
UseMI :
MRI.use_nodbg_instructions(DefOp.
getReg())) {
1950 if (
UseMI.isSubregToReg())
1951 return ValueTrackerResult();
1954 const MachineOperand &Src =
Def->getOperand(SrcIdx);
1956 return ValueTrackerResult();
1957 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1960ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
1961 assert((
Def->isRegSequence() ||
Def->isRegSequenceLike()) &&
1962 "Invalid definition");
1964 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"illegal subregister def");
1967 if (!
TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
1968 return ValueTrackerResult();
1976 if (RegSeqInput.SubIdx == DefSubReg)
1977 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
1980 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
1985 LaneBitmask DefMask =
TRI->getSubRegIndexLaneMask(DefSubReg);
1986 LaneBitmask ThisOpRegMask =
TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
1992 if ((DefMask & ThisOpRegMask) != DefMask)
1995 unsigned ReverseDefCompose =
1996 TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg);
1997 if (!ReverseDefCompose)
2000 unsigned ComposedDefInSrcReg1 =
2001 TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
2007 const TargetRegisterClass *SrcRC =
MRI.getRegClass(RegSeqInput.Reg);
2008 const TargetRegisterClass *SrcWithSubRC =
2009 TRI->getSubClassWithSubReg(SrcRC, ComposedDefInSrcReg1);
2010 if (SrcRC != SrcWithSubRC)
2011 return ValueTrackerResult();
2013 return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
2019 return ValueTrackerResult();
2022ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2023 assert((
Def->isInsertSubreg() ||
Def->isInsertSubregLike()) &&
2024 "Invalid definition");
2025 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subreg defs in SSA");
2029 if (!
TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2030 return ValueTrackerResult();
2039 if (InsertedReg.
SubIdx == DefSubReg) {
2040 return ValueTrackerResult(InsertedReg.
Reg, InsertedReg.
SubReg);
2045 const MachineOperand &MODef =
Def->getOperand(DefIdx);
2051 return ValueTrackerResult();
2055 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
2056 if ((
TRI->getSubRegIndexLaneMask(DefSubReg) &
2057 TRI->getSubRegIndexLaneMask(InsertedReg.
SubIdx))
2059 return ValueTrackerResult();
2062 return ValueTrackerResult(
BaseReg.Reg, DefSubReg);
2065ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2066 assert((
Def->isExtractSubreg() ||
Def->isExtractSubregLike()) &&
2067 "Invalid definition");
2074 return ValueTrackerResult();
2077 if (!
TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2078 return ValueTrackerResult();
2082 if (ExtractSubregInputReg.
SubReg)
2083 return ValueTrackerResult();
2085 return ValueTrackerResult(ExtractSubregInputReg.
Reg,
2086 ExtractSubregInputReg.
SubIdx);
2089ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2090 assert(
Def->isSubregToReg() &&
"Invalid definition");
2098 if (DefSubReg !=
Def->getOperand(3).getImm())
2099 return ValueTrackerResult();
2102 if (
Def->getOperand(2).getSubReg())
2103 return ValueTrackerResult();
2105 return ValueTrackerResult(
Def->getOperand(2).getReg(),
2106 Def->getOperand(3).getImm());
2110ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2111 assert(
Def->isPHI() &&
"Invalid definition");
2112 ValueTrackerResult Res;
2115 for (
unsigned i = 1, e =
Def->getNumOperands(); i < e; i += 2) {
2116 const MachineOperand &MO =
Def->getOperand(i);
2121 return ValueTrackerResult();
2128ValueTrackerResult ValueTracker::getNextSourceImpl() {
2129 assert(Def &&
"This method needs a valid definition");
2131 assert(((
Def->getOperand(DefIdx).isDef() &&
2132 (DefIdx < Def->
getDesc().getNumDefs() ||
2133 Def->getDesc().isVariadic())) ||
2134 Def->getOperand(DefIdx).isImplicit()) &&
2137 return getNextSourceFromCopy();
2138 if (
Def->isBitcast())
2139 return getNextSourceFromBitcast();
2143 return ValueTrackerResult();
2144 if (
Def->isRegSequence() ||
Def->isRegSequenceLike())
2145 return getNextSourceFromRegSequence();
2146 if (
Def->isInsertSubreg() ||
Def->isInsertSubregLike())
2147 return getNextSourceFromInsertSubreg();
2148 if (
Def->isExtractSubreg() ||
Def->isExtractSubregLike())
2149 return getNextSourceFromExtractSubreg();
2150 if (
Def->isSubregToReg())
2151 return getNextSourceFromSubregToReg();
2153 return getNextSourceFromPHI();
2154 return ValueTrackerResult();
2157ValueTrackerResult ValueTracker::getNextSource() {
2161 return ValueTrackerResult();
2163 ValueTrackerResult Res = getNextSourceImpl();
2164 if (Res.isValid()) {
2168 bool OneRegSrc = Res.getNumSources() == 1;
2170 Reg = Res.getSrcReg(0);
2179 if (DI !=
MRI.def_end()) {
2182 DefSubReg = Res.getSrcSubReg(0);
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Virtual Register Rewriter
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
bool isLoopHeader(const BlockT *BB) const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
An RAII based helper class to modify MachineFunctionProperties when running pass.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setDelegate(Delegate *delegate)
Set the delegate.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_iterator< false, true, false, true, false > def_iterator
def_iterator/def_begin/def_end - Walk all defs of the specified register.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI char & PeepholeOptimizerLegacyID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI void initializePeepholeOptimizerLegacyPass(PassRegistry &)
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.