LLVM  12.0.0git
MachineCSE.cpp
Go to the documentation of this file.
1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs global common subexpression elimination on machine
10 // instructions using a scoped hash table based value numbering scheme. It
11 // must be run while the machine function is still in SSA form.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/CFG.h"
31 #include "llvm/CodeGen/Passes.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/MC/MCRegisterInfo.h"
39 #include "llvm/Pass.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
44 #include <cassert>
45 #include <iterator>
46 #include <utility>
47 #include <vector>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "machine-cse"
52 
53 STATISTIC(NumCoalesces, "Number of copies coalesced");
54 STATISTIC(NumCSEs, "Number of common subexpression eliminated");
55 STATISTIC(NumPREs, "Number of partial redundant expression"
56  " transformed to fully redundant");
57 STATISTIC(NumPhysCSEs,
58  "Number of physreg referencing common subexpr eliminated");
59 STATISTIC(NumCrossBBCSEs,
60  "Number of cross-MBB physreg referencing CS eliminated");
61 STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
62 
63 namespace {
64 
65  class MachineCSE : public MachineFunctionPass {
66  const TargetInstrInfo *TII;
67  const TargetRegisterInfo *TRI;
68  AliasAnalysis *AA;
72 
73  public:
74  static char ID; // Pass identification
75 
76  MachineCSE() : MachineFunctionPass(ID) {
78  }
79 
80  bool runOnMachineFunction(MachineFunction &MF) override;
81 
82  void getAnalysisUsage(AnalysisUsage &AU) const override {
83  AU.setPreservesCFG();
91  }
92 
93  void releaseMemory() override {
94  ScopeMap.clear();
95  PREMap.clear();
96  Exps.clear();
97  }
98 
99  private:
100  using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
102  using ScopedHTType =
104  AllocatorTy>;
105  using ScopeType = ScopedHTType::ScopeTy;
106  using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
107 
108  unsigned LookAheadLimit = 0;
111  PREMap;
112  ScopedHTType VNT;
114  unsigned CurrVN = 0;
115 
116  bool PerformTrivialCopyPropagation(MachineInstr *MI,
118  bool isPhysDefTriviallyDead(unsigned Reg,
121  bool hasLivePhysRegDefUses(const MachineInstr *MI,
122  const MachineBasicBlock *MBB,
123  SmallSet<unsigned, 8> &PhysRefs,
124  PhysDefVector &PhysDefs, bool &PhysUseDef) const;
125  bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
126  SmallSet<unsigned, 8> &PhysRefs,
127  PhysDefVector &PhysDefs, bool &NonLocal) const;
128  bool isCSECandidate(MachineInstr *MI);
129  bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
130  MachineBasicBlock *CSBB, MachineInstr *MI);
131  void EnterScope(MachineBasicBlock *MBB);
132  void ExitScope(MachineBasicBlock *MBB);
133  bool ProcessBlockCSE(MachineBasicBlock *MBB);
134  void ExitScopeIfDone(MachineDomTreeNode *Node,
136  bool PerformCSE(MachineDomTreeNode *Node);
137 
138  bool isPRECandidate(MachineInstr *MI);
139  bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
140  bool PerformSimplePRE(MachineDominatorTree *DT);
141  /// Heuristics to see if it's profitable to move common computations of MBB
142  /// and MBB1 to CandidateBB.
143  bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
145  MachineBasicBlock *MBB1);
146  };
147 
148 } // end anonymous namespace
149 
150 char MachineCSE::ID = 0;
151 
153 
155  "Machine Common Subexpression Elimination", false, false)
159  "Machine Common Subexpression Elimination", false, false)
160 
161 /// The source register of a COPY machine instruction can be propagated to all
162 /// its users, and this propagation could increase the probability of finding
163 /// common subexpressions. If the COPY has only one user, the COPY itself can
164 /// be removed.
165 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
167  bool Changed = false;
168  for (MachineOperand &MO : MI->operands()) {
169  if (!MO.isReg() || !MO.isUse())
170  continue;
171  Register Reg = MO.getReg();
172  if (!Register::isVirtualRegister(Reg))
173  continue;
174  bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
175  MachineInstr *DefMI = MRI->getVRegDef(Reg);
176  if (!DefMI->isCopy())
177  continue;
178  Register SrcReg = DefMI->getOperand(1).getReg();
179  if (!Register::isVirtualRegister(SrcReg))
180  continue;
181  if (DefMI->getOperand(0).getSubReg())
182  continue;
183  // FIXME: We should trivially coalesce subregister copies to expose CSE
184  // opportunities on instructions with truncated operands (see
185  // cse-add-with-overflow.ll). This can be done here as follows:
186  // if (SrcSubReg)
187  // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
188  // SrcSubReg);
189  // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
190  //
191  // The 2-addr pass has been updated to handle coalesced subregs. However,
192  // some machine-specific code still can't handle it.
193  // To handle it properly we also need a way find a constrained subregister
194  // class given a super-reg class and subreg index.
195  if (DefMI->getOperand(1).getSubReg())
196  continue;
197  if (!MRI->constrainRegAttrs(SrcReg, Reg))
198  continue;
199  LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
200  LLVM_DEBUG(dbgs() << "*** to: " << *MI);
201 
202  // Propagate SrcReg of copies to MI.
203  MO.setReg(SrcReg);
204  MRI->clearKillFlags(SrcReg);
205  // Coalesce single use copies.
206  if (OnlyOneUse) {
207  // If (and only if) we've eliminated all uses of the copy, also
208  // copy-propagate to any debug-users of MI, or they'll be left using
209  // an undefined value.
210  DefMI->changeDebugValuesDefReg(SrcReg);
211 
212  DefMI->eraseFromParent();
213  ++NumCoalesces;
214  }
215  Changed = true;
216  }
217 
218  return Changed;
219 }
220 
221 bool
222 MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
225  unsigned LookAheadLeft = LookAheadLimit;
226  while (LookAheadLeft) {
227  // Skip over dbg_value's.
229 
230  if (I == E)
231  // Reached end of block, we don't know if register is dead or not.
232  return false;
233 
234  bool SeenDef = false;
235  for (const MachineOperand &MO : I->operands()) {
236  if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
237  SeenDef = true;
238  if (!MO.isReg() || !MO.getReg())
239  continue;
240  if (!TRI->regsOverlap(MO.getReg(), Reg))
241  continue;
242  if (MO.isUse())
243  // Found a use!
244  return false;
245  SeenDef = true;
246  }
247  if (SeenDef)
248  // See a def of Reg (or an alias) before encountering any use, it's
249  // trivially dead.
250  return true;
251 
252  --LookAheadLeft;
253  ++I;
254  }
255  return false;
256 }
257 
258 static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
259  const MachineFunction &MF,
260  const TargetRegisterInfo &TRI) {
261  // MachineRegisterInfo::isConstantPhysReg directly called by
262  // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
263  // reserved registers to be frozen. That doesn't cause a problem post-ISel as
264  // most (if not all) targets freeze reserved registers right after ISel.
265  //
266  // It does cause issues mid-GlobalISel, however, hence the additional
267  // reservedRegsFrozen check.
268  const MachineRegisterInfo &MRI = MF.getRegInfo();
269  return TRI.isCallerPreservedPhysReg(Reg, MF) ||
270  (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
271 }
272 
273 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
274 /// physical registers (except for dead defs of physical registers). It also
275 /// returns the physical register def by reference if it's the only one and the
276 /// instruction does not uses a physical register.
277 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
278  const MachineBasicBlock *MBB,
279  SmallSet<unsigned, 8> &PhysRefs,
280  PhysDefVector &PhysDefs,
281  bool &PhysUseDef) const {
282  // First, add all uses to PhysRefs.
283  for (const MachineOperand &MO : MI->operands()) {
284  if (!MO.isReg() || MO.isDef())
285  continue;
286  Register Reg = MO.getReg();
287  if (!Reg)
288  continue;
290  continue;
291  // Reading either caller preserved or constant physregs is ok.
292  if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
293  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
294  PhysRefs.insert(*AI);
295  }
296 
297  // Next, collect all defs into PhysDefs. If any is already in PhysRefs
298  // (which currently contains only uses), set the PhysUseDef flag.
299  PhysUseDef = false;
300  MachineBasicBlock::const_iterator I = MI; I = std::next(I);
301  for (const auto &MOP : llvm::enumerate(MI->operands())) {
302  const MachineOperand &MO = MOP.value();
303  if (!MO.isReg() || !MO.isDef())
304  continue;
305  Register Reg = MO.getReg();
306  if (!Reg)
307  continue;
309  continue;
310  // Check against PhysRefs even if the def is "dead".
311  if (PhysRefs.count(Reg))
312  PhysUseDef = true;
313  // If the def is dead, it's ok. But the def may not marked "dead". That's
314  // common since this pass is run before livevariables. We can scan
315  // forward a few instructions and check if it is obviously dead.
316  if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
317  PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
318  }
319 
320  // Finally, add all defs to PhysRefs as well.
321  for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
322  for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
323  ++AI)
324  PhysRefs.insert(*AI);
325 
326  return !PhysRefs.empty();
327 }
328 
329 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
330  SmallSet<unsigned, 8> &PhysRefs,
331  PhysDefVector &PhysDefs,
332  bool &NonLocal) const {
333  // For now conservatively returns false if the common subexpression is
334  // not in the same basic block as the given instruction. The only exception
335  // is if the common subexpression is in the sole predecessor block.
336  const MachineBasicBlock *MBB = MI->getParent();
337  const MachineBasicBlock *CSMBB = CSMI->getParent();
338 
339  bool CrossMBB = false;
340  if (CSMBB != MBB) {
341  if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
342  return false;
343 
344  for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
345  if (MRI->isAllocatable(PhysDefs[i].second) ||
346  MRI->isReserved(PhysDefs[i].second))
347  // Avoid extending live range of physical registers if they are
348  //allocatable or reserved.
349  return false;
350  }
351  CrossMBB = true;
352  }
353  MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
356  unsigned LookAheadLeft = LookAheadLimit;
357  while (LookAheadLeft) {
358  // Skip over dbg_value's.
359  while (I != E && I != EE && I->isDebugInstr())
360  ++I;
361 
362  if (I == EE) {
363  assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
364  (void)CrossMBB;
365  CrossMBB = false;
366  NonLocal = true;
367  I = MBB->begin();
368  EE = MBB->end();
369  continue;
370  }
371 
372  if (I == E)
373  return true;
374 
375  for (const MachineOperand &MO : I->operands()) {
376  // RegMasks go on instructions like calls that clobber lots of physregs.
377  // Don't attempt to CSE across such an instruction.
378  if (MO.isRegMask())
379  return false;
380  if (!MO.isReg() || !MO.isDef())
381  continue;
382  Register MOReg = MO.getReg();
383  if (Register::isVirtualRegister(MOReg))
384  continue;
385  if (PhysRefs.count(MOReg))
386  return false;
387  }
388 
389  --LookAheadLeft;
390  ++I;
391  }
392 
393  return false;
394 }
395 
396 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
397  if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
398  MI->isInlineAsm() || MI->isDebugInstr())
399  return false;
400 
401  // Ignore copies.
402  if (MI->isCopyLike())
403  return false;
404 
405  // Ignore stuff that we obviously can't move.
406  if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
408  return false;
409 
410  if (MI->mayLoad()) {
411  // Okay, this instruction does a load. As a refinement, we allow the target
412  // to decide whether the loaded value is actually a constant. If so, we can
413  // actually use it as a load.
414  if (!MI->isDereferenceableInvariantLoad(AA))
415  // FIXME: we should be able to hoist loads with no other side effects if
416  // there are no other instructions which can change memory in this loop.
417  // This is a trivial form of alias analysis.
418  return false;
419  }
420 
421  // Ignore stack guard loads, otherwise the register that holds CSEed value may
422  // be spilled and get loaded back with corrupted data.
423  if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
424  return false;
425 
426  return true;
427 }
428 
429 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
430 /// common expression that defines Reg. CSBB is basic block where CSReg is
431 /// defined.
432 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
433  MachineBasicBlock *CSBB, MachineInstr *MI) {
434  // FIXME: Heuristics that works around the lack the live range splitting.
435 
436  // If CSReg is used at all uses of Reg, CSE should not increase register
437  // pressure of CSReg.
438  bool MayIncreasePressure = true;
440  MayIncreasePressure = false;
442  for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
443  CSUses.insert(&MI);
444  }
445  for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
446  if (!CSUses.count(&MI)) {
447  MayIncreasePressure = true;
448  break;
449  }
450  }
451  }
452  if (!MayIncreasePressure) return true;
453 
454  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
455  // an immediate predecessor. We don't want to increase register pressure and
456  // end up causing other computation to be spilled.
457  if (TII->isAsCheapAsAMove(*MI)) {
458  MachineBasicBlock *BB = MI->getParent();
459  if (CSBB != BB && !CSBB->isSuccessor(BB))
460  return false;
461  }
462 
463  // Heuristics #2: If the expression doesn't not use a vr and the only use
464  // of the redundant computation are copies, do not cse.
465  bool HasVRegUse = false;
466  for (const MachineOperand &MO : MI->operands()) {
467  if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) {
468  HasVRegUse = true;
469  break;
470  }
471  }
472  if (!HasVRegUse) {
473  bool HasNonCopyUse = false;
474  for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
475  // Ignore copies.
476  if (!MI.isCopyLike()) {
477  HasNonCopyUse = true;
478  break;
479  }
480  }
481  if (!HasNonCopyUse)
482  return false;
483  }
484 
485  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
486  // it unless the defined value is already used in the BB of the new use.
487  bool HasPHI = false;
488  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
489  HasPHI |= UseMI.isPHI();
490  if (UseMI.getParent() == MI->getParent())
491  return true;
492  }
493 
494  return !HasPHI;
495 }
496 
497 void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
498  LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
499  ScopeType *Scope = new ScopeType(VNT);
500  ScopeMap[MBB] = Scope;
501 }
502 
503 void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
504  LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
506  assert(SI != ScopeMap.end());
507  delete SI->second;
508  ScopeMap.erase(SI);
509 }
510 
511 bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
512  bool Changed = false;
513 
515  SmallVector<unsigned, 2> ImplicitDefsToUpdate;
516  SmallVector<unsigned, 2> ImplicitDefs;
517  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
518  MachineInstr *MI = &*I;
519  ++I;
520 
521  if (!isCSECandidate(MI))
522  continue;
523 
524  bool FoundCSE = VNT.count(MI);
525  if (!FoundCSE) {
526  // Using trivial copy propagation to find more CSE opportunities.
527  if (PerformTrivialCopyPropagation(MI, MBB)) {
528  Changed = true;
529 
530  // After coalescing MI itself may become a copy.
531  if (MI->isCopyLike())
532  continue;
533 
534  // Try again to see if CSE is possible.
535  FoundCSE = VNT.count(MI);
536  }
537  }
538 
539  // Commute commutable instructions.
540  bool Commuted = false;
541  if (!FoundCSE && MI->isCommutable()) {
542  if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
543  Commuted = true;
544  FoundCSE = VNT.count(NewMI);
545  if (NewMI != MI) {
546  // New instruction. It doesn't need to be kept.
547  NewMI->eraseFromParent();
548  Changed = true;
549  } else if (!FoundCSE)
550  // MI was changed but it didn't help, commute it back!
551  (void)TII->commuteInstruction(*MI);
552  }
553  }
554 
555  // If the instruction defines physical registers and the values *may* be
556  // used, then it's not safe to replace it with a common subexpression.
557  // It's also not safe if the instruction uses physical registers.
558  bool CrossMBBPhysDef = false;
559  SmallSet<unsigned, 8> PhysRefs;
560  PhysDefVector PhysDefs;
561  bool PhysUseDef = false;
562  if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
563  PhysDefs, PhysUseDef)) {
564  FoundCSE = false;
565 
566  // ... Unless the CS is local or is in the sole predecessor block
567  // and it also defines the physical register which is not clobbered
568  // in between and the physical register uses were not clobbered.
569  // This can never be the case if the instruction both uses and
570  // defines the same physical register, which was detected above.
571  if (!PhysUseDef) {
572  unsigned CSVN = VNT.lookup(MI);
573  MachineInstr *CSMI = Exps[CSVN];
574  if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
575  FoundCSE = true;
576  }
577  }
578 
579  if (!FoundCSE) {
580  VNT.insert(MI, CurrVN++);
581  Exps.push_back(MI);
582  continue;
583  }
584 
585  // Found a common subexpression, eliminate it.
586  unsigned CSVN = VNT.lookup(MI);
587  MachineInstr *CSMI = Exps[CSVN];
588  LLVM_DEBUG(dbgs() << "Examining: " << *MI);
589  LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
590 
591  // Check if it's profitable to perform this CSE.
592  bool DoCSE = true;
593  unsigned NumDefs = MI->getNumDefs();
594 
595  for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
596  MachineOperand &MO = MI->getOperand(i);
597  if (!MO.isReg() || !MO.isDef())
598  continue;
599  Register OldReg = MO.getReg();
600  Register NewReg = CSMI->getOperand(i).getReg();
601 
602  // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
603  // we should make sure it is not dead at CSMI.
604  if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
605  ImplicitDefsToUpdate.push_back(i);
606 
607  // Keep track of implicit defs of CSMI and MI, to clear possibly
608  // made-redundant kill flags.
609  if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
610  ImplicitDefs.push_back(OldReg);
611 
612  if (OldReg == NewReg) {
613  --NumDefs;
614  continue;
615  }
616 
618  Register::isVirtualRegister(NewReg) &&
619  "Do not CSE physical register defs!");
620 
621  if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
622  LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
623  DoCSE = false;
624  break;
625  }
626 
627  // Don't perform CSE if the result of the new instruction cannot exist
628  // within the constraints (register class, bank, or low-level type) of
629  // the old instruction.
630  if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
631  LLVM_DEBUG(
632  dbgs() << "*** Not the same register constraints, avoid CSE!\n");
633  DoCSE = false;
634  break;
635  }
636 
637  CSEPairs.push_back(std::make_pair(OldReg, NewReg));
638  --NumDefs;
639  }
640 
641  // Actually perform the elimination.
642  if (DoCSE) {
643  for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
644  unsigned OldReg = CSEPair.first;
645  unsigned NewReg = CSEPair.second;
646  // OldReg may have been unused but is used now, clear the Dead flag
647  MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
648  assert(Def != nullptr && "CSEd register has no unique definition?");
649  Def->clearRegisterDeads(NewReg);
650  // Replace with NewReg and clear kill flags which may be wrong now.
651  MRI->replaceRegWith(OldReg, NewReg);
652  MRI->clearKillFlags(NewReg);
653  }
654 
655  // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
656  // we should make sure it is not dead at CSMI.
657  for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
658  CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
659  for (auto PhysDef : PhysDefs)
660  if (!MI->getOperand(PhysDef.first).isDead())
661  CSMI->getOperand(PhysDef.first).setIsDead(false);
662 
663  // Go through implicit defs of CSMI and MI, and clear the kill flags on
664  // their uses in all the instructions between CSMI and MI.
665  // We might have made some of the kill flags redundant, consider:
666  // subs ... implicit-def %nzcv <- CSMI
667  // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
668  // subs ... implicit-def %nzcv <- MI, to be eliminated
669  // csinc ... implicit killed %nzcv
670  // Since we eliminated MI, and reused a register imp-def'd by CSMI
671  // (here %nzcv), that register, if it was killed before MI, should have
672  // that kill flag removed, because it's lifetime was extended.
673  if (CSMI->getParent() == MI->getParent()) {
674  for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
675  for (auto ImplicitDef : ImplicitDefs)
676  if (MachineOperand *MO = II->findRegisterUseOperand(
677  ImplicitDef, /*isKill=*/true, TRI))
678  MO->setIsKill(false);
679  } else {
680  // If the instructions aren't in the same BB, bail out and clear the
681  // kill flag on all uses of the imp-def'd register.
682  for (auto ImplicitDef : ImplicitDefs)
683  MRI->clearKillFlags(ImplicitDef);
684  }
685 
686  if (CrossMBBPhysDef) {
687  // Add physical register defs now coming in from a predecessor to MBB
688  // livein list.
689  while (!PhysDefs.empty()) {
690  auto LiveIn = PhysDefs.pop_back_val();
691  if (!MBB->isLiveIn(LiveIn.second))
692  MBB->addLiveIn(LiveIn.second);
693  }
694  ++NumCrossBBCSEs;
695  }
696 
697  MI->eraseFromParent();
698  ++NumCSEs;
699  if (!PhysRefs.empty())
700  ++NumPhysCSEs;
701  if (Commuted)
702  ++NumCommutes;
703  Changed = true;
704  } else {
705  VNT.insert(MI, CurrVN++);
706  Exps.push_back(MI);
707  }
708  CSEPairs.clear();
709  ImplicitDefsToUpdate.clear();
710  ImplicitDefs.clear();
711  }
712 
713  return Changed;
714 }
715 
716 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
717 /// dominator tree node if its a leaf or all of its children are done. Walk
718 /// up the dominator tree to destroy ancestors which are now done.
719 void
720 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
722  if (OpenChildren[Node])
723  return;
724 
725  // Pop scope.
726  ExitScope(Node->getBlock());
727 
728  // Now traverse upwards to pop ancestors whose offsprings are all done.
729  while (MachineDomTreeNode *Parent = Node->getIDom()) {
730  unsigned Left = --OpenChildren[Parent];
731  if (Left != 0)
732  break;
733  ExitScope(Parent->getBlock());
734  Node = Parent;
735  }
736 }
737 
738 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
742 
743  CurrVN = 0;
744 
745  // Perform a DFS walk to determine the order of visit.
746  WorkList.push_back(Node);
747  do {
748  Node = WorkList.pop_back_val();
749  Scopes.push_back(Node);
750  OpenChildren[Node] = Node->getNumChildren();
751  for (MachineDomTreeNode *Child : Node->children())
752  WorkList.push_back(Child);
753  } while (!WorkList.empty());
754 
755  // Now perform CSE.
756  bool Changed = false;
757  for (MachineDomTreeNode *Node : Scopes) {
758  MachineBasicBlock *MBB = Node->getBlock();
759  EnterScope(MBB);
760  Changed |= ProcessBlockCSE(MBB);
761  // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
762  ExitScopeIfDone(Node, OpenChildren);
763  }
764 
765  return Changed;
766 }
767 
768 // We use stronger checks for PRE candidate rather than for CSE ones to embrace
769 // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
770 // to exclude instrs created by PRE that won't be CSEed later.
771 bool MachineCSE::isPRECandidate(MachineInstr *MI) {
772  if (!isCSECandidate(MI) ||
773  MI->isNotDuplicable() ||
774  MI->mayLoad() ||
775  MI->isAsCheapAsAMove() ||
776  MI->getNumDefs() != 1 ||
777  MI->getNumExplicitDefs() != 1)
778  return false;
779 
780  for (auto def : MI->defs())
781  if (!Register::isVirtualRegister(def.getReg()))
782  return false;
783 
784  for (auto use : MI->uses())
785  if (use.isReg() && !Register::isVirtualRegister(use.getReg()))
786  return false;
787 
788  return true;
789 }
790 
791 bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
792  MachineBasicBlock *MBB) {
793  bool Changed = false;
794  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
795  MachineInstr *MI = &*I;
796  ++I;
797 
798  if (!isPRECandidate(MI))
799  continue;
800 
801  if (!PREMap.count(MI)) {
802  PREMap[MI] = MBB;
803  continue;
804  }
805 
806  auto MBB1 = PREMap[MI];
807  assert(
808  !DT->properlyDominates(MBB, MBB1) &&
809  "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
810  auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
811  if (!CMBB->isLegalToHoistInto())
812  continue;
813 
814  if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
815  continue;
816 
817  // Two instrs are partial redundant if their basic blocks are reachable
818  // from one to another but one doesn't dominate another.
819  if (CMBB != MBB1) {
820  auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
821  if (BB != nullptr && BB1 != nullptr &&
822  (isPotentiallyReachable(BB1, BB) ||
823  isPotentiallyReachable(BB, BB1))) {
824 
825  assert(MI->getOperand(0).isDef() &&
826  "First operand of instr with one explicit def must be this def");
827  Register VReg = MI->getOperand(0).getReg();
828  Register NewReg = MRI->cloneVirtualRegister(VReg);
829  if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
830  continue;
831  MachineInstr &NewMI =
832  TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
833 
834  // When hoisting, make sure we don't carry the debug location of
835  // the original instruction, as that's not correct and can cause
836  // unexpected jumps when debugging optimized code.
837  auto EmptyDL = DebugLoc();
838  NewMI.setDebugLoc(EmptyDL);
839 
840  NewMI.getOperand(0).setReg(NewReg);
841 
842  PREMap[MI] = CMBB;
843  ++NumPREs;
844  Changed = true;
845  }
846  }
847  }
848  return Changed;
849 }
850 
851 // This simple PRE (partial redundancy elimination) pass doesn't actually
852 // eliminate partial redundancy but transforms it to full redundancy,
853 // anticipating that the next CSE step will eliminate this created redundancy.
854 // If CSE doesn't eliminate this, than created instruction will remain dead
855 // and eliminated later by Remove Dead Machine Instructions pass.
856 bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
858 
859  PREMap.clear();
860  bool Changed = false;
861  BBs.push_back(DT->getRootNode());
862  do {
863  auto Node = BBs.pop_back_val();
864  for (MachineDomTreeNode *Child : Node->children())
865  BBs.push_back(Child);
866 
867  MachineBasicBlock *MBB = Node->getBlock();
868  Changed |= ProcessBlockPRE(DT, MBB);
869 
870  } while (!BBs.empty());
871 
872  return Changed;
873 }
874 
875 bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
876  MachineBasicBlock *MBB,
877  MachineBasicBlock *MBB1) {
878  if (CandidateBB->getParent()->getFunction().hasMinSize())
879  return true;
880  assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
881  assert(DT->dominates(CandidateBB, MBB1) &&
882  "CandidateBB should dominate MBB1");
883  return MBFI->getBlockFreq(CandidateBB) <=
884  MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
885 }
886 
887 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
888  if (skipFunction(MF.getFunction()))
889  return false;
890 
891  TII = MF.getSubtarget().getInstrInfo();
892  TRI = MF.getSubtarget().getRegisterInfo();
893  MRI = &MF.getRegInfo();
894  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
895  DT = &getAnalysis<MachineDominatorTree>();
896  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
897  LookAheadLimit = TII->getMachineCSELookAheadLimit();
898  bool ChangedPRE, ChangedCSE;
899  ChangedPRE = PerformSimplePRE(DT);
900  ChangedCSE = PerformCSE(DT->getRootNode());
901  return ChangedPRE || ChangedCSE;
902 }
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:158
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isReserved(Register PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:760
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:603
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock *> *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction &#39;To&#39; is reachable from &#39;From&#39;, without passing through any blocks in Ex...
Definition: CFG.cpp:216
INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_END(MachineCSE
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void push_back(const T &Elt)
Definition: SmallVector.h:246
void initializeMachineCSEPass(PassRegistry &)
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register...
#define DEBUG_TYPE
Definition: MachineCSE.cpp:51
unsigned Reg
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore...
MachineInstr * commuteInstruction(MachineInstr &MI, bool NewMI=false, unsigned OpIdx1=CommuteAnyOperandIndex, unsigned OpIdx2=CommuteAnyOperandIndex) const
This method commutes the operands of the given machine instruction MI.
unsigned getSubReg() const
bool isInlineAsm() const
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setIsDead(bool Val=true)
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:559
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isPHI() const
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...
This file defines the BumpPtrAllocator interface.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineBasicBlock & MBB
virtual unsigned getMachineCSELookAheadLimit() const
Return the value to use for the MachineCSE&#39;s LookAheadLimit, which is a heuristic used for CSE&#39;ing ph...
AnalysisUsage & addRequired()
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:459
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:784
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:456
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:152
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:975
Base class for the actual dominator tree node.
AnalysisUsage & addPreservedID(const void *ID)
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
void setReg(Register Reg)
Change the register this operand corresponds to.
virtual const TargetInstrInfo * getInstrInfo() const
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
TargetInstrInfo - Interface to description of machine instruction set.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:370
NodeT * getBlock() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:955
unsigned const MachineRegisterInfo * MRI
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Move duplicate certain instructions close to their use
Definition: Localizer.cpp:29
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineInstrBuilder & UseMI
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
MCRegAliasIterator enumerates all registers aliasing Reg.
Represent the analysis usage information of a pass.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:592
constexpr double e
Definition: MathExtras.h:58
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:375
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
bool isCopy() const
bool isImplicitDef() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isDebugInstr() const
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
virtual bool isAsCheapAsAMove(const MachineInstr &MI) const
Return true if the instruction is as cheap as a move instruction.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:439
bool isDereferenceableInvariantLoad(AAResults *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
MachineDomTreeNode * getRootNode() const
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
size_t getNumChildren() const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineInstrBuilder MachineInstrBuilder & DefMI
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:420
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
iterator_range< iterator > children()
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
unsigned pred_size() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
IterT skipDebugInstructionsForward(IterT It, IterT End)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator...
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:280
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:62
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:59
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:644
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
unsigned getNumDefs() const
Returns the total number of definitions.
Definition: MachineInstr.h:508
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
bool isKill() const
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
virtual bool isCallerPreservedPhysReg(MCRegister PhysReg, const MachineFunction &MF) const
Physical registers that may be modified within a function but are guaranteed to be restored before an...
static bool isCallerPreservedOrConstPhysReg(unsigned Reg, const MachineFunction &MF, const TargetRegisterInfo &TRI)
Definition: MachineCSE.cpp:258
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:942
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
bool isPosition() const
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
Register getReg() const
getReg - Returns the register number.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:466
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isAsCheapAsAMove(QueryType Type=AllInBundle) const
Returns true if this instruction has the same cost (or less) than a move instruction.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:994
bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1886
bool isNotDuplicable(QueryType Type=AnyInBundle) const
Return true if this instruction cannot be safely duplicated.
Definition: MachineInstr.h:858
bool isImplicit() const
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164