51#define DEBUG_TYPE "machine-cse"
53STATISTIC(NumCoalesces,
"Number of copies coalesced");
54STATISTIC(NumCSEs,
"Number of common subexpression eliminated");
55STATISTIC(NumPREs,
"Number of partial redundant expression"
56 " transformed to fully redundant");
58 "Number of physreg referencing common subexpr eliminated");
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes,
"Number of copies coalesced after commuting");
66 cl::desc(
"Threshold for the size of CSUses"));
70 cl::desc(
"Override the profitability heuristics for Machine CSE"));
104 .
set(MachineFunctionProperties::Property::IsSSA);
122 unsigned LookAheadLimit = 0;
138 PhysDefVector &PhysDefs,
bool &PhysUseDef)
const;
141 PhysDefVector &PhysDefs,
bool &NonLocal)
const;
164char MachineCSE::ID = 0;
169 "Machine Common Subexpression Elimination",
false,
false)
181 bool Changed =
false;
184 if (!Reg.isVirtual())
186 bool OnlyOneUse =
MRI->hasOneNonDBGUse(Reg);
209 if (!
MRI->constrainRegAttrs(SrcReg, Reg))
216 MRI->clearKillFlags(SrcReg);
233bool MachineCSE::isPhysDefTriviallyDead(
236 unsigned LookAheadLeft = LookAheadLimit;
237 while (LookAheadLeft) {
245 bool SeenDef =
false;
247 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
249 if (!MO.isReg() || !MO.getReg())
251 if (!
TRI->regsOverlap(MO.getReg(), Reg))
282 return TRI.isCallerPreservedPhysReg(Reg, MF) ||
TII.isIgnorableUse(MO) ||
283 (
MRI.reservedRegsFrozen() &&
MRI.isConstantPhysReg(Reg));
293 PhysDefVector &PhysDefs,
294 bool &PhysUseDef)
const {
329 PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
333 for (
unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
338 return !PhysRefs.
empty();
343 PhysDefVector &PhysDefs,
344 bool &NonLocal)
const {
351 bool CrossMBB =
false;
356 for (
unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
357 if (
MRI->isAllocatable(PhysDefs[i].second) ||
358 MRI->isReserved(PhysDefs[i].second))
368 unsigned LookAheadLeft = LookAheadLimit;
369 while (LookAheadLeft) {
371 while (
I !=
E &&
I != EE &&
I->isDebugInstr())
375 assert(CrossMBB &&
"Reaching end-of-MBB without finding MI?");
409 if (
MI->isPosition() ||
MI->isPHI() ||
MI->isImplicitDef() ||
MI->isKill() ||
410 MI->isInlineAsm() ||
MI->isDebugInstr() ||
MI->isJumpTableDebugInfo())
414 if (
MI->isCopyLike())
418 if (
MI->mayStore() ||
MI->isCall() ||
MI->isTerminator() ||
419 MI->mayRaiseFPException() ||
MI->hasUnmodeledSideEffects())
426 if (!
MI->isDereferenceableInvariantLoad())
435 if (
MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
453 bool MayIncreasePressure =
true;
455 MayIncreasePressure =
false;
463 MayIncreasePressure =
true;
467 if (!MayIncreasePressure)
470 MayIncreasePressure =
true;
475 if (!MayIncreasePressure)
return true;
488 bool HasVRegUse =
false;
496 bool HasNonCopyUse =
false;
499 if (!
MI.isCopyLike()) {
500 HasNonCopyUse =
true;
512 HasPHI |=
UseMI.isPHI();
513 if (
UseMI.getParent() ==
MI->getParent())
522 ScopeType *
Scope =
new ScopeType(VNT);
529 assert(SI != ScopeMap.end());
535 bool Changed =
false;
541 if (!isCSECandidate(&
MI))
544 bool FoundCSE = VNT.count(&
MI);
547 if (PerformTrivialCopyPropagation(&
MI,
MBB)) {
555 FoundCSE = VNT.count(&
MI);
560 bool Commuted =
false;
561 if (!FoundCSE &&
MI.isCommutable()) {
564 FoundCSE = VNT.count(NewMI);
567 NewMI->eraseFromParent();
569 }
else if (!FoundCSE)
571 (void)
TII->commuteInstruction(
MI);
578 bool CrossMBBPhysDef =
false;
580 PhysDefVector PhysDefs;
581 bool PhysUseDef =
false;
583 hasLivePhysRegDefUses(&
MI,
MBB, PhysRefs, PhysDefs, PhysUseDef)) {
592 unsigned CSVN = VNT.lookup(&
MI);
594 if (PhysRegDefsReach(CSMI, &
MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
600 VNT.insert(&
MI, CurrVN++);
606 unsigned CSVN = VNT.lookup(&
MI);
609 LLVM_DEBUG(
dbgs() <<
"*** Found a common subexpression: " << *CSMI);
620 if (
MI.isConvergent() &&
MI.getParent() != CSMI->
getParent()) {
621 LLVM_DEBUG(
dbgs() <<
"*** Convergent MI and subexpression exist in "
622 "different BBs, avoid CSE!\n");
623 VNT.insert(&
MI, CurrVN++);
630 unsigned NumDefs =
MI.getNumDefs();
632 for (
unsigned i = 0, e =
MI.getNumOperands(); NumDefs && i != e; ++i) {
649 if (OldReg == NewReg) {
655 "Do not CSE physical register defs!");
657 if (!isProfitableToCSE(NewReg, OldReg, CSMI->
getParent(), &
MI)) {
666 if (!
MRI->constrainRegAttrs(NewReg, OldReg)) {
668 dbgs() <<
"*** Not the same register constraints, avoid CSE!\n");
673 CSEPairs.
push_back(std::make_pair(OldReg, NewReg));
679 for (
const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
680 unsigned OldReg = CSEPair.first;
681 unsigned NewReg = CSEPair.second;
684 assert(Def !=
nullptr &&
"CSEd register has no unique definition?");
685 Def->clearRegisterDeads(NewReg);
687 MRI->replaceRegWith(OldReg, NewReg);
688 MRI->clearKillFlags(NewReg);
693 for (
unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
695 for (
const auto &PhysDef : PhysDefs)
696 if (!
MI.getOperand(PhysDef.first).isDead())
711 for (
auto ImplicitDef : ImplicitDefs)
713 ImplicitDef,
true,
TRI))
718 for (
auto ImplicitDef : ImplicitDefs)
719 MRI->clearKillFlags(ImplicitDef);
722 if (CrossMBBPhysDef) {
725 while (!PhysDefs.empty()) {
726 auto LiveIn = PhysDefs.pop_back_val();
733 MI.eraseFromParent();
735 if (!PhysRefs.
empty())
741 VNT.insert(&
MI, CurrVN++);
745 ImplicitDefsToUpdate.clear();
746 ImplicitDefs.clear();
758 if (OpenChildren[
Node])
762 ExitScope(
Node->getBlock());
766 unsigned Left = --OpenChildren[Parent];
769 ExitScope(Parent->getBlock());
786 OpenChildren[
Node] =
Node->getNumChildren();
788 }
while (!WorkList.
empty());
791 bool Changed =
false;
795 Changed |= ProcessBlockCSE(
MBB);
797 ExitScopeIfDone(
Node, OpenChildren);
808 if (!isCSECandidate(
MI) ||
809 MI->isNotDuplicable() ||
812 MI->getNumDefs() != 1 ||
813 MI->getNumExplicitDefs() != 1)
830 bool Changed =
false;
833 if (!isPRECandidate(&
MI, PhysRefs))
836 if (!PREMap.count(&
MI)) {
841 auto MBB1 = PREMap[&
MI];
844 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
846 if (!CMBB->isLegalToHoistInto())
849 if (!isProfitableToHoistInto(CMBB,
MBB, MBB1))
856 if (BB !=
nullptr && BB1 !=
nullptr &&
866 if (
MI.isConvergent() && CMBB !=
MBB)
872 PhysDefVector PhysDefs;
873 if (!PhysRefs.
empty() &&
874 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &
MI, PhysRefs,
879 "First operand of instr with one explicit def must be this def");
882 if (!isProfitableToCSE(NewReg, VReg, CMBB, &
MI))
885 TII->duplicate(*CMBB, CMBB->getFirstTerminator(),
MI);
913 bool Changed =
false;
920 Changed |= ProcessBlockPRE(DT,
MBB);
922 }
while (!BBs.
empty());
934 "CandidateBB should dominate MBB1");
946 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
947 DT = &getAnalysis<MachineDominatorTree>();
948 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
949 LookAheadLimit =
TII->getMachineCSELookAheadLimit();
950 bool ChangedPRE, ChangedCSE;
951 ChangedPRE = PerformSimplePRE(DT);
953 return ChangedPRE || ChangedCSE;
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
Machine Common Subexpression Elimination
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
static cl::opt< bool > AggressiveMachineCSE("aggressive-machine-cse", cl::Hidden, cl::init(false), cl::desc("Override the profitability heuristics for Machine CSE"))
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Base class for the actual dominator tree node.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool isAsCheapAsAMove(const MachineInstr &MI) const override
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
unsigned pred_size() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
pred_iterator pred_begin()
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
MachineDomTreeNode * getRootNode() const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
void initializeMachineCSEPass(PassRegistry &)
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...