LLVM  10.0.0svn
MachineCSE.cpp
Go to the documentation of this file.
1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs global common subexpression elimination on machine
10 // instructions using a scoped hash table based value numbering scheme. It
11 // must be run while the machine function is still in SSA form.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/CFG.h"
31 #include "llvm/CodeGen/Passes.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/MC/MCRegisterInfo.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/Allocator.h"
40 #include "llvm/Support/Debug.h"
43 #include <cassert>
44 #include <iterator>
45 #include <utility>
46 #include <vector>
47 
48 using namespace llvm;
49 
50 #define DEBUG_TYPE "machine-cse"
51 
52 STATISTIC(NumCoalesces, "Number of copies coalesced");
53 STATISTIC(NumCSEs, "Number of common subexpression eliminated");
54 STATISTIC(NumPREs, "Number of partial redundant expression"
55  " transformed to fully redundant");
56 STATISTIC(NumPhysCSEs,
57  "Number of physreg referencing common subexpr eliminated");
58 STATISTIC(NumCrossBBCSEs,
59  "Number of cross-MBB physreg referencing CS eliminated");
60 STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
61 
62 namespace {
63 
64  class MachineCSE : public MachineFunctionPass {
65  const TargetInstrInfo *TII;
66  const TargetRegisterInfo *TRI;
67  AliasAnalysis *AA;
71 
72  public:
73  static char ID; // Pass identification
74 
75  MachineCSE() : MachineFunctionPass(ID) {
77  }
78 
79  bool runOnMachineFunction(MachineFunction &MF) override;
80 
81  void getAnalysisUsage(AnalysisUsage &AU) const override {
82  AU.setPreservesCFG();
90  }
91 
92  void releaseMemory() override {
93  ScopeMap.clear();
94  PREMap.clear();
95  Exps.clear();
96  }
97 
98  private:
99  using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
101  using ScopedHTType =
103  AllocatorTy>;
104  using ScopeType = ScopedHTType::ScopeTy;
105  using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
106 
107  unsigned LookAheadLimit = 0;
110  PREMap;
111  ScopedHTType VNT;
113  unsigned CurrVN = 0;
114 
115  bool PerformTrivialCopyPropagation(MachineInstr *MI,
116  MachineBasicBlock *MBB);
117  bool isPhysDefTriviallyDead(unsigned Reg,
120  bool hasLivePhysRegDefUses(const MachineInstr *MI,
121  const MachineBasicBlock *MBB,
122  SmallSet<unsigned, 8> &PhysRefs,
123  PhysDefVector &PhysDefs, bool &PhysUseDef) const;
124  bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
125  SmallSet<unsigned, 8> &PhysRefs,
126  PhysDefVector &PhysDefs, bool &NonLocal) const;
127  bool isCSECandidate(MachineInstr *MI);
128  bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
129  MachineBasicBlock *CSBB, MachineInstr *MI);
130  void EnterScope(MachineBasicBlock *MBB);
131  void ExitScope(MachineBasicBlock *MBB);
132  bool ProcessBlockCSE(MachineBasicBlock *MBB);
133  void ExitScopeIfDone(MachineDomTreeNode *Node,
135  bool PerformCSE(MachineDomTreeNode *Node);
136 
137  bool isPRECandidate(MachineInstr *MI);
138  bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
139  bool PerformSimplePRE(MachineDominatorTree *DT);
140  /// Heuristics to see if it's profitable to move common computations of MBB
141  /// and MBB1 to CandidateBB.
142  bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
143  MachineBasicBlock *MBB,
144  MachineBasicBlock *MBB1);
145  };
146 
147 } // end anonymous namespace
148 
149 char MachineCSE::ID = 0;
150 
152 
154  "Machine Common Subexpression Elimination", false, false)
158  "Machine Common Subexpression Elimination", false, false)
159 
160 /// The source register of a COPY machine instruction can be propagated to all
161 /// its users, and this propagation could increase the probability of finding
162 /// common subexpressions. If the COPY has only one user, the COPY itself can
163 /// be removed.
164 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
165  MachineBasicBlock *MBB) {
166  bool Changed = false;
167  for (MachineOperand &MO : MI->operands()) {
168  if (!MO.isReg() || !MO.isUse())
169  continue;
170  Register Reg = MO.getReg();
171  if (!Register::isVirtualRegister(Reg))
172  continue;
173  bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
174  MachineInstr *DefMI = MRI->getVRegDef(Reg);
175  if (!DefMI->isCopy())
176  continue;
177  Register SrcReg = DefMI->getOperand(1).getReg();
178  if (!Register::isVirtualRegister(SrcReg))
179  continue;
180  if (DefMI->getOperand(0).getSubReg())
181  continue;
182  // FIXME: We should trivially coalesce subregister copies to expose CSE
183  // opportunities on instructions with truncated operands (see
184  // cse-add-with-overflow.ll). This can be done here as follows:
185  // if (SrcSubReg)
186  // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
187  // SrcSubReg);
188  // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
189  //
190  // The 2-addr pass has been updated to handle coalesced subregs. However,
191  // some machine-specific code still can't handle it.
192  // To handle it properly we also need a way find a constrained subregister
193  // class given a super-reg class and subreg index.
194  if (DefMI->getOperand(1).getSubReg())
195  continue;
196  if (!MRI->constrainRegAttrs(SrcReg, Reg))
197  continue;
198  LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
199  LLVM_DEBUG(dbgs() << "*** to: " << *MI);
200 
201  // Update matching debug values.
202  DefMI->changeDebugValuesDefReg(SrcReg);
203 
204  // Propagate SrcReg of copies to MI.
205  MO.setReg(SrcReg);
206  MRI->clearKillFlags(SrcReg);
207  // Coalesce single use copies.
208  if (OnlyOneUse) {
209  DefMI->eraseFromParent();
210  ++NumCoalesces;
211  }
212  Changed = true;
213  }
214 
215  return Changed;
216 }
217 
218 bool
219 MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
222  unsigned LookAheadLeft = LookAheadLimit;
223  while (LookAheadLeft) {
224  // Skip over dbg_value's.
226 
227  if (I == E)
228  // Reached end of block, we don't know if register is dead or not.
229  return false;
230 
231  bool SeenDef = false;
232  for (const MachineOperand &MO : I->operands()) {
233  if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
234  SeenDef = true;
235  if (!MO.isReg() || !MO.getReg())
236  continue;
237  if (!TRI->regsOverlap(MO.getReg(), Reg))
238  continue;
239  if (MO.isUse())
240  // Found a use!
241  return false;
242  SeenDef = true;
243  }
244  if (SeenDef)
245  // See a def of Reg (or an alias) before encountering any use, it's
246  // trivially dead.
247  return true;
248 
249  --LookAheadLeft;
250  ++I;
251  }
252  return false;
253 }
254 
255 static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
256  const MachineFunction &MF,
257  const TargetRegisterInfo &TRI) {
258  // MachineRegisterInfo::isConstantPhysReg directly called by
259  // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
260  // reserved registers to be frozen. That doesn't cause a problem post-ISel as
261  // most (if not all) targets freeze reserved registers right after ISel.
262  //
263  // It does cause issues mid-GlobalISel, however, hence the additional
264  // reservedRegsFrozen check.
265  const MachineRegisterInfo &MRI = MF.getRegInfo();
266  return TRI.isCallerPreservedPhysReg(Reg, MF) ||
267  (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
268 }
269 
270 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
271 /// physical registers (except for dead defs of physical registers). It also
272 /// returns the physical register def by reference if it's the only one and the
273 /// instruction does not uses a physical register.
274 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
275  const MachineBasicBlock *MBB,
276  SmallSet<unsigned, 8> &PhysRefs,
277  PhysDefVector &PhysDefs,
278  bool &PhysUseDef) const {
279  // First, add all uses to PhysRefs.
280  for (const MachineOperand &MO : MI->operands()) {
281  if (!MO.isReg() || MO.isDef())
282  continue;
283  Register Reg = MO.getReg();
284  if (!Reg)
285  continue;
287  continue;
288  // Reading either caller preserved or constant physregs is ok.
289  if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
290  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
291  PhysRefs.insert(*AI);
292  }
293 
294  // Next, collect all defs into PhysDefs. If any is already in PhysRefs
295  // (which currently contains only uses), set the PhysUseDef flag.
296  PhysUseDef = false;
297  MachineBasicBlock::const_iterator I = MI; I = std::next(I);
298  for (const auto &MOP : llvm::enumerate(MI->operands())) {
299  const MachineOperand &MO = MOP.value();
300  if (!MO.isReg() || !MO.isDef())
301  continue;
302  Register Reg = MO.getReg();
303  if (!Reg)
304  continue;
306  continue;
307  // Check against PhysRefs even if the def is "dead".
308  if (PhysRefs.count(Reg))
309  PhysUseDef = true;
310  // If the def is dead, it's ok. But the def may not marked "dead". That's
311  // common since this pass is run before livevariables. We can scan
312  // forward a few instructions and check if it is obviously dead.
313  if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
314  PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
315  }
316 
317  // Finally, add all defs to PhysRefs as well.
318  for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
319  for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
320  ++AI)
321  PhysRefs.insert(*AI);
322 
323  return !PhysRefs.empty();
324 }
325 
326 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
327  SmallSet<unsigned, 8> &PhysRefs,
328  PhysDefVector &PhysDefs,
329  bool &NonLocal) const {
330  // For now conservatively returns false if the common subexpression is
331  // not in the same basic block as the given instruction. The only exception
332  // is if the common subexpression is in the sole predecessor block.
333  const MachineBasicBlock *MBB = MI->getParent();
334  const MachineBasicBlock *CSMBB = CSMI->getParent();
335 
336  bool CrossMBB = false;
337  if (CSMBB != MBB) {
338  if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
339  return false;
340 
341  for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
342  if (MRI->isAllocatable(PhysDefs[i].second) ||
343  MRI->isReserved(PhysDefs[i].second))
344  // Avoid extending live range of physical registers if they are
345  //allocatable or reserved.
346  return false;
347  }
348  CrossMBB = true;
349  }
350  MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
353  unsigned LookAheadLeft = LookAheadLimit;
354  while (LookAheadLeft) {
355  // Skip over dbg_value's.
356  while (I != E && I != EE && I->isDebugInstr())
357  ++I;
358 
359  if (I == EE) {
360  assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
361  (void)CrossMBB;
362  CrossMBB = false;
363  NonLocal = true;
364  I = MBB->begin();
365  EE = MBB->end();
366  continue;
367  }
368 
369  if (I == E)
370  return true;
371 
372  for (const MachineOperand &MO : I->operands()) {
373  // RegMasks go on instructions like calls that clobber lots of physregs.
374  // Don't attempt to CSE across such an instruction.
375  if (MO.isRegMask())
376  return false;
377  if (!MO.isReg() || !MO.isDef())
378  continue;
379  Register MOReg = MO.getReg();
380  if (Register::isVirtualRegister(MOReg))
381  continue;
382  if (PhysRefs.count(MOReg))
383  return false;
384  }
385 
386  --LookAheadLeft;
387  ++I;
388  }
389 
390  return false;
391 }
392 
393 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
394  if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
395  MI->isInlineAsm() || MI->isDebugInstr())
396  return false;
397 
398  // Ignore copies.
399  if (MI->isCopyLike())
400  return false;
401 
402  // Ignore stuff that we obviously can't move.
403  if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
405  return false;
406 
407  if (MI->mayLoad()) {
408  // Okay, this instruction does a load. As a refinement, we allow the target
409  // to decide whether the loaded value is actually a constant. If so, we can
410  // actually use it as a load.
411  if (!MI->isDereferenceableInvariantLoad(AA))
412  // FIXME: we should be able to hoist loads with no other side effects if
413  // there are no other instructions which can change memory in this loop.
414  // This is a trivial form of alias analysis.
415  return false;
416  }
417 
418  // Ignore stack guard loads, otherwise the register that holds CSEed value may
419  // be spilled and get loaded back with corrupted data.
420  if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
421  return false;
422 
423  return true;
424 }
425 
426 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
427 /// common expression that defines Reg. CSBB is basic block where CSReg is
428 /// defined.
429 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
430  MachineBasicBlock *CSBB, MachineInstr *MI) {
431  // FIXME: Heuristics that works around the lack the live range splitting.
432 
433  // If CSReg is used at all uses of Reg, CSE should not increase register
434  // pressure of CSReg.
435  bool MayIncreasePressure = true;
437  MayIncreasePressure = false;
439  for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
440  CSUses.insert(&MI);
441  }
442  for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
443  if (!CSUses.count(&MI)) {
444  MayIncreasePressure = true;
445  break;
446  }
447  }
448  }
449  if (!MayIncreasePressure) return true;
450 
451  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
452  // an immediate predecessor. We don't want to increase register pressure and
453  // end up causing other computation to be spilled.
454  if (TII->isAsCheapAsAMove(*MI)) {
455  MachineBasicBlock *BB = MI->getParent();
456  if (CSBB != BB && !CSBB->isSuccessor(BB))
457  return false;
458  }
459 
460  // Heuristics #2: If the expression doesn't not use a vr and the only use
461  // of the redundant computation are copies, do not cse.
462  bool HasVRegUse = false;
463  for (const MachineOperand &MO : MI->operands()) {
464  if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) {
465  HasVRegUse = true;
466  break;
467  }
468  }
469  if (!HasVRegUse) {
470  bool HasNonCopyUse = false;
471  for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
472  // Ignore copies.
473  if (!MI.isCopyLike()) {
474  HasNonCopyUse = true;
475  break;
476  }
477  }
478  if (!HasNonCopyUse)
479  return false;
480  }
481 
482  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
483  // it unless the defined value is already used in the BB of the new use.
484  bool HasPHI = false;
485  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
486  HasPHI |= UseMI.isPHI();
487  if (UseMI.getParent() == MI->getParent())
488  return true;
489  }
490 
491  return !HasPHI;
492 }
493 
494 void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
495  LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
496  ScopeType *Scope = new ScopeType(VNT);
497  ScopeMap[MBB] = Scope;
498 }
499 
500 void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
501  LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
503  assert(SI != ScopeMap.end());
504  delete SI->second;
505  ScopeMap.erase(SI);
506 }
507 
508 bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
509  bool Changed = false;
510 
512  SmallVector<unsigned, 2> ImplicitDefsToUpdate;
513  SmallVector<unsigned, 2> ImplicitDefs;
514  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
515  MachineInstr *MI = &*I;
516  ++I;
517 
518  if (!isCSECandidate(MI))
519  continue;
520 
521  bool FoundCSE = VNT.count(MI);
522  if (!FoundCSE) {
523  // Using trivial copy propagation to find more CSE opportunities.
524  if (PerformTrivialCopyPropagation(MI, MBB)) {
525  Changed = true;
526 
527  // After coalescing MI itself may become a copy.
528  if (MI->isCopyLike())
529  continue;
530 
531  // Try again to see if CSE is possible.
532  FoundCSE = VNT.count(MI);
533  }
534  }
535 
536  // Commute commutable instructions.
537  bool Commuted = false;
538  if (!FoundCSE && MI->isCommutable()) {
539  if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
540  Commuted = true;
541  FoundCSE = VNT.count(NewMI);
542  if (NewMI != MI) {
543  // New instruction. It doesn't need to be kept.
544  NewMI->eraseFromParent();
545  Changed = true;
546  } else if (!FoundCSE)
547  // MI was changed but it didn't help, commute it back!
548  (void)TII->commuteInstruction(*MI);
549  }
550  }
551 
552  // If the instruction defines physical registers and the values *may* be
553  // used, then it's not safe to replace it with a common subexpression.
554  // It's also not safe if the instruction uses physical registers.
555  bool CrossMBBPhysDef = false;
556  SmallSet<unsigned, 8> PhysRefs;
557  PhysDefVector PhysDefs;
558  bool PhysUseDef = false;
559  if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
560  PhysDefs, PhysUseDef)) {
561  FoundCSE = false;
562 
563  // ... Unless the CS is local or is in the sole predecessor block
564  // and it also defines the physical register which is not clobbered
565  // in between and the physical register uses were not clobbered.
566  // This can never be the case if the instruction both uses and
567  // defines the same physical register, which was detected above.
568  if (!PhysUseDef) {
569  unsigned CSVN = VNT.lookup(MI);
570  MachineInstr *CSMI = Exps[CSVN];
571  if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
572  FoundCSE = true;
573  }
574  }
575 
576  if (!FoundCSE) {
577  VNT.insert(MI, CurrVN++);
578  Exps.push_back(MI);
579  continue;
580  }
581 
582  // Found a common subexpression, eliminate it.
583  unsigned CSVN = VNT.lookup(MI);
584  MachineInstr *CSMI = Exps[CSVN];
585  LLVM_DEBUG(dbgs() << "Examining: " << *MI);
586  LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
587 
588  // Check if it's profitable to perform this CSE.
589  bool DoCSE = true;
590  unsigned NumDefs = MI->getNumDefs();
591 
592  for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
593  MachineOperand &MO = MI->getOperand(i);
594  if (!MO.isReg() || !MO.isDef())
595  continue;
596  Register OldReg = MO.getReg();
597  Register NewReg = CSMI->getOperand(i).getReg();
598 
599  // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
600  // we should make sure it is not dead at CSMI.
601  if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
602  ImplicitDefsToUpdate.push_back(i);
603 
604  // Keep track of implicit defs of CSMI and MI, to clear possibly
605  // made-redundant kill flags.
606  if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
607  ImplicitDefs.push_back(OldReg);
608 
609  if (OldReg == NewReg) {
610  --NumDefs;
611  continue;
612  }
613 
615  Register::isVirtualRegister(NewReg) &&
616  "Do not CSE physical register defs!");
617 
618  if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
619  LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
620  DoCSE = false;
621  break;
622  }
623 
624  // Don't perform CSE if the result of the new instruction cannot exist
625  // within the constraints (register class, bank, or low-level type) of
626  // the old instruction.
627  if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
628  LLVM_DEBUG(
629  dbgs() << "*** Not the same register constraints, avoid CSE!\n");
630  DoCSE = false;
631  break;
632  }
633 
634  CSEPairs.push_back(std::make_pair(OldReg, NewReg));
635  --NumDefs;
636  }
637 
638  // Actually perform the elimination.
639  if (DoCSE) {
640  for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
641  unsigned OldReg = CSEPair.first;
642  unsigned NewReg = CSEPair.second;
643  // OldReg may have been unused but is used now, clear the Dead flag
644  MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
645  assert(Def != nullptr && "CSEd register has no unique definition?");
646  Def->clearRegisterDeads(NewReg);
647  // Replace with NewReg and clear kill flags which may be wrong now.
648  MRI->replaceRegWith(OldReg, NewReg);
649  MRI->clearKillFlags(NewReg);
650  }
651 
652  // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
653  // we should make sure it is not dead at CSMI.
654  for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
655  CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
656  for (auto PhysDef : PhysDefs)
657  if (!MI->getOperand(PhysDef.first).isDead())
658  CSMI->getOperand(PhysDef.first).setIsDead(false);
659 
660  // Go through implicit defs of CSMI and MI, and clear the kill flags on
661  // their uses in all the instructions between CSMI and MI.
662  // We might have made some of the kill flags redundant, consider:
663  // subs ... implicit-def %nzcv <- CSMI
664  // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
665  // subs ... implicit-def %nzcv <- MI, to be eliminated
666  // csinc ... implicit killed %nzcv
667  // Since we eliminated MI, and reused a register imp-def'd by CSMI
668  // (here %nzcv), that register, if it was killed before MI, should have
669  // that kill flag removed, because it's lifetime was extended.
670  if (CSMI->getParent() == MI->getParent()) {
671  for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
672  for (auto ImplicitDef : ImplicitDefs)
673  if (MachineOperand *MO = II->findRegisterUseOperand(
674  ImplicitDef, /*isKill=*/true, TRI))
675  MO->setIsKill(false);
676  } else {
677  // If the instructions aren't in the same BB, bail out and clear the
678  // kill flag on all uses of the imp-def'd register.
679  for (auto ImplicitDef : ImplicitDefs)
680  MRI->clearKillFlags(ImplicitDef);
681  }
682 
683  if (CrossMBBPhysDef) {
684  // Add physical register defs now coming in from a predecessor to MBB
685  // livein list.
686  while (!PhysDefs.empty()) {
687  auto LiveIn = PhysDefs.pop_back_val();
688  if (!MBB->isLiveIn(LiveIn.second))
689  MBB->addLiveIn(LiveIn.second);
690  }
691  ++NumCrossBBCSEs;
692  }
693 
694  MI->eraseFromParent();
695  ++NumCSEs;
696  if (!PhysRefs.empty())
697  ++NumPhysCSEs;
698  if (Commuted)
699  ++NumCommutes;
700  Changed = true;
701  } else {
702  VNT.insert(MI, CurrVN++);
703  Exps.push_back(MI);
704  }
705  CSEPairs.clear();
706  ImplicitDefsToUpdate.clear();
707  ImplicitDefs.clear();
708  }
709 
710  return Changed;
711 }
712 
713 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
714 /// dominator tree node if its a leaf or all of its children are done. Walk
715 /// up the dominator tree to destroy ancestors which are now done.
716 void
717 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
719  if (OpenChildren[Node])
720  return;
721 
722  // Pop scope.
723  ExitScope(Node->getBlock());
724 
725  // Now traverse upwards to pop ancestors whose offsprings are all done.
726  while (MachineDomTreeNode *Parent = Node->getIDom()) {
727  unsigned Left = --OpenChildren[Parent];
728  if (Left != 0)
729  break;
730  ExitScope(Parent->getBlock());
731  Node = Parent;
732  }
733 }
734 
735 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
739 
740  CurrVN = 0;
741 
742  // Perform a DFS walk to determine the order of visit.
743  WorkList.push_back(Node);
744  do {
745  Node = WorkList.pop_back_val();
746  Scopes.push_back(Node);
747  const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
748  OpenChildren[Node] = Children.size();
749  for (MachineDomTreeNode *Child : Children)
750  WorkList.push_back(Child);
751  } while (!WorkList.empty());
752 
753  // Now perform CSE.
754  bool Changed = false;
755  for (MachineDomTreeNode *Node : Scopes) {
756  MachineBasicBlock *MBB = Node->getBlock();
757  EnterScope(MBB);
758  Changed |= ProcessBlockCSE(MBB);
759  // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
760  ExitScopeIfDone(Node, OpenChildren);
761  }
762 
763  return Changed;
764 }
765 
766 // We use stronger checks for PRE candidate rather than for CSE ones to embrace
767 // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
768 // to exclude instrs created by PRE that won't be CSEed later.
769 bool MachineCSE::isPRECandidate(MachineInstr *MI) {
770  if (!isCSECandidate(MI) ||
771  MI->isNotDuplicable() ||
772  MI->mayLoad() ||
773  MI->isAsCheapAsAMove() ||
774  MI->getNumDefs() != 1 ||
775  MI->getNumExplicitDefs() != 1)
776  return false;
777 
778  for (auto def : MI->defs())
779  if (!Register::isVirtualRegister(def.getReg()))
780  return false;
781 
782  for (auto use : MI->uses())
783  if (use.isReg() && !Register::isVirtualRegister(use.getReg()))
784  return false;
785 
786  return true;
787 }
788 
789 bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
790  MachineBasicBlock *MBB) {
791  bool Changed = false;
792  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
793  MachineInstr *MI = &*I;
794  ++I;
795 
796  if (!isPRECandidate(MI))
797  continue;
798 
799  if (!PREMap.count(MI)) {
800  PREMap[MI] = MBB;
801  continue;
802  }
803 
804  auto MBB1 = PREMap[MI];
805  assert(
806  !DT->properlyDominates(MBB, MBB1) &&
807  "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
808  auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
809  if (!CMBB->isLegalToHoistInto())
810  continue;
811 
812  if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
813  continue;
814 
815  // Two instrs are partial redundant if their basic blocks are reachable
816  // from one to another but one doesn't dominate another.
817  if (CMBB != MBB1) {
818  auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
819  if (BB != nullptr && BB1 != nullptr &&
820  (isPotentiallyReachable(BB1, BB) ||
821  isPotentiallyReachable(BB, BB1))) {
822 
823  assert(MI->getOperand(0).isDef() &&
824  "First operand of instr with one explicit def must be this def");
825  Register VReg = MI->getOperand(0).getReg();
826  Register NewReg = MRI->cloneVirtualRegister(VReg);
827  if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
828  continue;
829  MachineInstr &NewMI =
830  TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
831  NewMI.getOperand(0).setReg(NewReg);
832 
833  PREMap[MI] = CMBB;
834  ++NumPREs;
835  Changed = true;
836  }
837  }
838  }
839  return Changed;
840 }
841 
842 // This simple PRE (partial redundancy elimination) pass doesn't actually
843 // eliminate partial redundancy but transforms it to full redundancy,
844 // anticipating that the next CSE step will eliminate this created redundancy.
845 // If CSE doesn't eliminate this, than created instruction will remain dead
846 // and eliminated later by Remove Dead Machine Instructions pass.
847 bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
849 
850  PREMap.clear();
851  bool Changed = false;
852  BBs.push_back(DT->getRootNode());
853  do {
854  auto Node = BBs.pop_back_val();
855  const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
856  for (MachineDomTreeNode *Child : Children)
857  BBs.push_back(Child);
858 
859  MachineBasicBlock *MBB = Node->getBlock();
860  Changed |= ProcessBlockPRE(DT, MBB);
861 
862  } while (!BBs.empty());
863 
864  return Changed;
865 }
866 
867 bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
868  MachineBasicBlock *MBB,
869  MachineBasicBlock *MBB1) {
870  if (CandidateBB->getParent()->getFunction().hasMinSize())
871  return true;
872  assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
873  assert(DT->dominates(CandidateBB, MBB1) &&
874  "CandidateBB should dominate MBB1");
875  return MBFI->getBlockFreq(CandidateBB) <=
876  MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
877 }
878 
879 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
880  if (skipFunction(MF.getFunction()))
881  return false;
882 
883  TII = MF.getSubtarget().getInstrInfo();
884  TRI = MF.getSubtarget().getRegisterInfo();
885  MRI = &MF.getRegInfo();
886  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
887  DT = &getAnalysis<MachineDominatorTree>();
888  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
889  LookAheadLimit = TII->getMachineCSELookAheadLimit();
890  bool ChangedPRE, ChangedCSE;
891  ChangedPRE = PerformSimplePRE(DT);
892  ChangedCSE = PerformCSE(DT->getRootNode());
893  return ChangedPRE || ChangedCSE;
894 }
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:157
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs immediately following this instruction that point to a register def in this instr...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isReserved(Register PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:651
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:510
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock *> *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction &#39;To&#39; is reachable from &#39;From&#39;, without passing through any blocks in Ex...
Definition: CFG.cpp:218
INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_END(MachineCSE
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void push_back(const T &Elt)
Definition: SmallVector.h:211
void initializeMachineCSEPass(PassRegistry &)
#define DEBUG_TYPE
Definition: MachineCSE.cpp:50
unsigned Reg
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore...
MachineInstr * commuteInstruction(MachineInstr &MI, bool NewMI=false, unsigned OpIdx1=CommuteAnyOperandIndex, unsigned OpIdx2=CommuteAnyOperandIndex) const
This method commutes the operands of the given machine instruction MI.
unsigned getSubReg() const
bool isInlineAsm() const
bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
unsigned second
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
void setIsDead(bool Val=true)
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isPHI() const
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
virtual unsigned getMachineCSELookAheadLimit() const
Return the value to use for the MachineCSE&#39;s LookAheadLimit, which is a heuristic used for CSE&#39;ing ph...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:667
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:151
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:858
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Base class for the actual dominator tree node.
const std::vector< DomTreeNodeBase * > & getChildren() const
AnalysisUsage & addPreservedID(const void *ID)
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
void setReg(Register Reg)
Change the register this operand corresponds to.
virtual const TargetInstrInfo * getInstrInfo() const
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
TargetInstrInfo - Interface to description of machine instruction set.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:434
NodeT * getBlock() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:838
unsigned const MachineRegisterInfo * MRI
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Move duplicate certain instructions close to their use
Definition: Localizer.cpp:27
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineInstrBuilder & UseMI
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
MCRegAliasIterator enumerates all registers aliasing Reg.
Represent the analysis usage information of a pass.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:499
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
bool isCopy() const
bool isImplicitDef() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isDebugInstr() const
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
virtual bool isAsCheapAsAMove(const MachineInstr &MI) const
Return true if the instruction is as cheap as a move instruction.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
MachineDomTreeNode * getRootNode() const
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineInstrBuilder MachineInstrBuilder & DefMI
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
unsigned pred_size() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
IterT skipDebugInstructionsForward(IterT It, IterT End)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator...
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:619
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
unsigned getNumDefs() const
Returns the total number of definitions.
Definition: MachineInstr.h:426
virtual bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const
Physical registers that may be modified within a function but are guaranteed to be restored before an...
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register...
bool isKill() const
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static bool isCallerPreservedOrConstPhysReg(unsigned Reg, const MachineFunction &MF, const TargetRegisterInfo &TRI)
Definition: MachineCSE.cpp:255
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:825
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool isPosition() const
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
Register getReg() const
getReg - Returns the register number.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(unsigned Reg) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isAsCheapAsAMove(QueryType Type=AllInBundle) const
Returns true if this instruction has the same cost (or less) than a move instruction.
Definition: MachineInstr.h:935
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:877
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1500
bool isNotDuplicable(QueryType Type=AnyInBundle) const
Return true if this instruction cannot be safely duplicated.
Definition: MachineInstr.h:741
bool isImplicit() const
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164