37#define DEBUG_TYPE "machine-combiner"
39STATISTIC(NumInstCombined,
"Number of machineinst combined");
43 cl::desc(
"Incremental depth computation will be used for basic "
44 "blocks with more instructions."),
cl::init(500));
47 cl::desc(
"Dump all substituted intrs"),
50#ifdef EXPENSIVE_CHECKS
52 "machine-combiner-verify-pattern-order",
cl::Hidden,
54 "Verify that the generated patterns are ordered by increasing latency"),
58 "machine-combiner-verify-pattern-order",
cl::Hidden,
60 "Verify that the generated patterns are ordered by increasing latency"),
83 void getAnalysisUsage(AnalysisUsage &AU)
const override;
84 bool runOnMachineFunction(MachineFunction &MF)
override;
85 StringRef getPassName()
const override {
return "Machine InstCombiner"; }
88 bool combineInstructions(MachineBasicBlock *);
89 MachineInstr *getOperandDef(
const MachineOperand &MO);
90 bool isTransientMI(
const MachineInstr *
MI);
91 unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
92 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
94 const MachineBasicBlock &
MBB);
95 unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
97 bool improvesCriticalPathLen(MachineBasicBlock *
MBB, MachineInstr *Root,
99 SmallVectorImpl<MachineInstr *> &InsInstrs,
100 SmallVectorImpl<MachineInstr *> &DelInstrs,
101 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
102 unsigned Pattern,
bool SlackIsAccurate);
103 bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *
MBB,
104 SmallVectorImpl<MachineInstr *> &InsInstrs,
105 SmallVectorImpl<MachineInstr *> &DelInstrs,
107 bool preservesResourceLen(MachineBasicBlock *
MBB,
109 SmallVectorImpl<MachineInstr *> &InsInstrs,
110 SmallVectorImpl<MachineInstr *> &DelInstrs);
111 void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
112 SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
113 std::pair<unsigned, unsigned>
114 getLatenciesForInstrSequences(MachineInstr &
MI,
115 SmallVectorImpl<MachineInstr *> &InsInstrs,
116 SmallVectorImpl<MachineInstr *> &DelInstrs,
123char MachineCombiner::ID = 0;
127 "Machine InstCombiner",
false,
false)
133void MachineCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
134 AU.setPreservesCFG();
147 MachineInstr *DefInstr =
nullptr;
150 DefInstr =
MRI->getUniqueVRegDef(MO.
getReg());
155bool MachineCombiner::isTransientMI(
const MachineInstr *
MI) {
157 return MI->isTransient();
163 if (!
MI->isFullCopy()) {
165 if (
MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
168 auto SrcSub =
MI->getOperand(1).getSubReg();
169 auto SrcRC =
MRI->getRegClass(Src);
170 auto DstRC =
MRI->getRegClass(Dst);
171 return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) !=
nullptr;
174 if (Src.isPhysical() && Dst.isPhysical())
177 if (Src.isVirtual() && Dst.isVirtual()) {
178 auto SrcRC =
MRI->getRegClass(Src);
179 auto DstRC =
MRI->getRegClass(Dst);
180 return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC);
187 auto DstRC =
MRI->getRegClass(Dst);
188 return DstRC->contains(Src);
200MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
201 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
203 const MachineBasicBlock &
MBB) {
204 SmallVector<unsigned, 16> InstrDepth;
208 for (
auto *InstrPtr : InsInstrs) {
210 for (
const MachineOperand &MO : InstrPtr->all_uses()) {
214 unsigned DepthOp = 0;
215 unsigned LatencyOp = 0;
216 DenseMap<Register, unsigned>::iterator
II =
218 if (
II != InstrIdxForVirtReg.
end()) {
221 MachineInstr *DefInstr = InsInstrs[
II->second];
223 "There must be a definition for a new virtual register");
224 DepthOp = InstrDepth[
II->second];
228 InstrPtr->findRegisterUseOperandIdx(MO.
getReg(),
nullptr);
232 MachineInstr *DefInstr = getOperandDef(MO);
233 if (DefInstr && (
TII->getMachineCombinerTraceStrategy() !=
234 MachineTraceStrategy::TS_Local ||
237 if (!isTransientMI(DefInstr))
243 InstrPtr->findRegisterUseOperandIdx(MO.
getReg(),
247 IDepth = std::max(IDepth, DepthOp + LatencyOp);
251 unsigned NewRootIdx = InsInstrs.size() - 1;
252 return InstrDepth[NewRootIdx];
264unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
267 unsigned NewRootLatency = 0;
269 for (
const MachineOperand &MO : NewRoot->
all_defs()) {
276 if (RI ==
MRI->reg_end())
279 unsigned LatencyOp = 0;
287 LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
289 NewRootLatency = std::max(NewRootLatency, LatencyOp);
291 return NewRootLatency;
298 case MachineCombinerPattern::REASSOC_AX_BY:
299 case MachineCombinerPattern::REASSOC_AX_YB:
300 case MachineCombinerPattern::REASSOC_XA_BY:
301 case MachineCombinerPattern::REASSOC_XA_YB:
302 return CombinerObjective::MustReduceDepth;
304 return TII->getCombinerObjective(Pattern);
312std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
313 MachineInstr &
MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
314 SmallVectorImpl<MachineInstr *> &DelInstrs,
316 assert(!InsInstrs.
empty() &&
"Only support sequences that insert instrs.");
317 unsigned NewRootLatency = 0;
319 MachineInstr *NewRoot = InsInstrs.
back();
320 for (
unsigned i = 0; i < InsInstrs.
size() - 1; i++)
321 NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
322 NewRootLatency += getLatency(&
MI, NewRoot, BlockTrace);
324 unsigned RootLatency = 0;
325 for (
auto *
I : DelInstrs)
326 RootLatency += TSchedModel.computeInstrLatency(
I);
328 return {NewRootLatency, RootLatency};
331bool MachineCombiner::reduceRegisterPressure(
332 MachineInstr &Root, MachineBasicBlock *
MBB,
333 SmallVectorImpl<MachineInstr *> &InsInstrs,
334 SmallVectorImpl<MachineInstr *> &DelInstrs,
unsigned Pattern) {
347bool MachineCombiner::improvesCriticalPathLen(
348 MachineBasicBlock *
MBB, MachineInstr *Root,
350 SmallVectorImpl<MachineInstr *> &InsInstrs,
351 SmallVectorImpl<MachineInstr *> &DelInstrs,
352 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
unsigned Pattern,
353 bool SlackIsAccurate) {
355 unsigned NewRootDepth =
356 getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *
MBB);
359 LLVM_DEBUG(
dbgs() <<
" Dependence data for " << *Root <<
"\tNewRootDepth: "
360 << NewRootDepth <<
"\tRootDepth: " << RootDepth);
367 if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
370 ?
dbgs() <<
"\t and it does it\n"
371 :
dbgs() <<
"\t but it does NOT do it\n");
372 return NewRootDepth < RootDepth;
380 unsigned NewRootLatency, RootLatency;
381 if (
TII->accumulateInstrSeqToRootLatency(*Root)) {
382 std::tie(NewRootLatency, RootLatency) =
383 getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
385 NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.
back());
386 RootLatency = TSchedModel.computeInstrLatency(Root);
390 unsigned NewCycleCount = NewRootDepth + NewRootLatency;
391 unsigned OldCycleCount =
392 RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
394 <<
"\tRootLatency: " << RootLatency <<
"\n\tRootSlack: "
395 << RootSlack <<
" SlackIsAccurate=" << SlackIsAccurate
396 <<
"\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
397 <<
"\n\tRootDepth + RootLatency + RootSlack = "
400 ?
dbgs() <<
"\n\t It IMPROVES PathLen because"
401 :
dbgs() <<
"\n\t It DOES NOT improve PathLen because");
403 <<
", OldCycleCount = " << OldCycleCount <<
"\n");
405 return NewCycleCount <= OldCycleCount;
409void MachineCombiner::instr2instrSC(
410 SmallVectorImpl<MachineInstr *> &Instrs,
411 SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
412 for (
auto *InstrPtr : Instrs) {
413 unsigned Opc = InstrPtr->getOpcode();
414 unsigned Idx =
TII->get(
Opc).getSchedClass();
421bool MachineCombiner::preservesResourceLen(
423 SmallVectorImpl<MachineInstr *> &InsInstrs,
424 SmallVectorImpl<MachineInstr *> &DelInstrs) {
439 instr2instrSC(InsInstrs, InsInstrsSC);
440 instr2instrSC(DelInstrs, DelInstrsSC);
446 unsigned ResLenAfterCombine =
450 << ResLenBeforeCombine
451 <<
" and after: " << ResLenAfterCombine <<
"\n");
453 ResLenAfterCombine <=
454 ResLenBeforeCombine +
TII->getExtendResourceLenLimit()
455 ?
dbgs() <<
"\t\t As result it IMPROVES/PRESERVES Resource Length\n"
456 :
dbgs() <<
"\t\t As result it DOES NOT improve/preserve Resource "
459 return ResLenAfterCombine <=
460 ResLenBeforeCombine +
TII->getExtendResourceLenLimit();
482 unsigned Pattern,
bool IncrementalUpdate) {
492 for (
auto *InstrPtr : InsInstrs)
495 for (
auto *InstrPtr : DelInstrs) {
496 InstrPtr->eraseFromParent();
498 for (
auto *
I = RegUnits.
begin();
I != RegUnits.
end();) {
499 if (
I->MI == InstrPtr)
506 if (IncrementalUpdate)
507 for (
auto *InstrPtr : InsInstrs)
522bool MachineCombiner::combineInstructions(MachineBasicBlock *
MBB) {
526 bool IncrementalUpdate =
false;
528 decltype(BlockIter) LastUpdate;
532 TraceEnsemble = Traces->
getEnsemble(
TII->getMachineCombinerTraceStrategy());
539 bool DoRegPressureReduce =
540 TII->shouldReduceRegisterPressure(
MBB, &RegClassInfo);
542 while (BlockIter !=
MBB->
end()) {
543 auto &
MI = *BlockIter++;
544 SmallVector<unsigned, 16> Patterns;
572 if (!
TII->getMachineCombinerPatterns(
MI, Patterns, DoRegPressureReduce))
576 [[maybe_unused]]
long PrevLatencyDiff = std::numeric_limits<long>::max();
578 for (
const auto P : Patterns) {
581 DenseMap<Register, unsigned> InstrIdxForVirtReg;
582 TII->genAlternativeCodeSequence(
MI,
P, InsInstrs, DelInstrs,
587 if (InsInstrs.
empty())
591 dbgs() <<
"\tFor the Pattern (" << (int)
P
592 <<
") these instructions could be removed\n";
593 for (
auto const *InstrPtr : DelInstrs)
594 InstrPtr->print(
dbgs(),
false,
false,
596 dbgs() <<
"\tThese instructions could replace the removed ones\n";
597 for (
auto const *InstrPtr : InsInstrs)
598 InstrPtr->print(
dbgs(),
false,
false,
606 auto [NewRootLatency, RootLatency] = getLatenciesForInstrSequences(
608 long CurrentLatencyDiff = ((long)RootLatency) - ((
long)NewRootLatency);
609 assert(CurrentLatencyDiff <= PrevLatencyDiff &&
610 "Current pattern is expected to be better than the previous "
612 PrevLatencyDiff = CurrentLatencyDiff;
615 if (IncrementalUpdate && LastUpdate != BlockIter) {
617 TraceEnsemble->
updateDepths(LastUpdate, BlockIter, RegUnits);
618 LastUpdate = BlockIter;
621 if (DoRegPressureReduce &&
622 getCombinerObjective(
P) ==
623 CombinerObjective::MustReduceRegisterPressure) {
626 IncrementalUpdate =
true;
627 LastUpdate = BlockIter;
629 if (reduceRegisterPressure(
MI,
MBB, InsInstrs, DelInstrs,
P)) {
632 RegUnits,
TII,
P, IncrementalUpdate);
642 if (
ML &&
TII->isThroughputPattern(
P)) {
643 LLVM_DEBUG(
dbgs() <<
"\t Replacing due to throughput pattern in loop\n");
645 RegUnits,
TII,
P, IncrementalUpdate);
649 }
else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
651 << InsInstrs.size() <<
" < "
652 << DelInstrs.size() <<
")\n");
654 RegUnits,
TII,
P, IncrementalUpdate);
666 if (improvesCriticalPathLen(
MBB, &
MI, BlockTrace, InsInstrs, DelInstrs,
667 InstrIdxForVirtReg,
P,
668 !IncrementalUpdate) &&
669 preservesResourceLen(
MBB, BlockTrace, InsInstrs, DelInstrs)) {
672 IncrementalUpdate =
true;
673 LastUpdate = BlockIter;
677 RegUnits,
TII,
P, IncrementalUpdate);
686 for (
auto *InstrPtr : InsInstrs)
687 MF->deleteMachineInstr(InstrPtr);
689 InstrIdxForVirtReg.
clear();
693 if (
Changed && IncrementalUpdate)
698bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
703 TSchedModel.
init(STI);
705 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
706 Traces = &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
707 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
709 &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
711 TraceEnsemble =
nullptr;
718 <<
" Skipping pass: Target does not support machine combiner\n");
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*–===//
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, MachineTraceMetrics::Ensemble *TraceEnsemble, LiveRegUnitSet &RegUnits, const TargetInstrInfo *TII, unsigned Pattern, bool IncrementalUpdate)
Inserts InsInstrs and deletes DelInstrs.
static cl::opt< bool > VerifyPatternOrder("machine-combiner-verify-pattern-order", cl::Hidden, cl::desc("Verify that the generated patterns are ordered by increasing latency"), cl::init(false))
static cl::opt< unsigned > inc_threshold("machine-combiner-inc-threshold", cl::Hidden, cl::desc("Incremental depth computation will be used for basic " "blocks with more instructions."), cl::init(500))
static cl::opt< bool > dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, cl::desc("Dump all substituted intrs"), cl::init(false))
Register const TargetRegisterInfo * TRI
Promote Memory to Register
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
iterator find(const_arg_type_t< KeyT > Val)
bool useMachineCombiner() const override
This is an alternative analysis pass to MachineBlockFrequencyInfo.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
LLVM_ABI int findRegisterUseOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_iterator< true, true, false, true, false > reg_iterator
reg_iterator/reg_begin/reg_end - Walk all defs and uses of the specified register.
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, LiveRegUnitSet &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, LiveRegUnitSet &RegUnits)
Updates the depth of the instructions from Start to End.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks={}, ArrayRef< const MCSchedClassDesc * > ExtraInstrs={}, ArrayRef< const MCSchedClassDesc * > RemoveInstrs={}) const
Return the resource length of the trace.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void verifyAnalysis() const
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
const_iterator begin() const
const_iterator end() const
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
bool hasInstrSchedModelOrItineraries() const
Return true if this machine model includes an instruction-level scheduling model or cycle-to-cycle it...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI char & MachineCombinerID
This pass performs instruction combining using trace metrics to estimate critical-path and resource d...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
SparseSet< LiveRegUnit, MCRegUnit, MCRegUnitToIndex > LiveRegUnitSet
ArrayRef(const T &OneElt) -> ArrayRef< T >
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...