37#define DEBUG_TYPE "machine-combiner"
39STATISTIC(NumInstCombined,
"Number of machineinst combined");
43 cl::desc(
"Incremental depth computation will be used for basic "
44 "blocks with more instructions."),
cl::init(500));
47 cl::desc(
"Dump all substituted intrs"),
50#ifdef EXPENSIVE_CHECKS
52 "machine-combiner-verify-pattern-order",
cl::Hidden,
54 "Verify that the generated patterns are ordered by increasing latency"),
58 "machine-combiner-verify-pattern-order",
cl::Hidden,
60 "Verify that the generated patterns are ordered by increasing latency"),
83 void getAnalysisUsage(AnalysisUsage &AU)
const override;
84 bool runOnMachineFunction(MachineFunction &MF)
override;
85 StringRef getPassName()
const override {
return "Machine InstCombiner"; }
88 bool combineInstructions(MachineBasicBlock *);
89 MachineInstr *getOperandDef(
const MachineOperand &MO);
90 bool isTransientMI(
const MachineInstr *
MI);
91 unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
92 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
94 const MachineBasicBlock &
MBB);
95 unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
97 bool improvesCriticalPathLen(MachineBasicBlock *
MBB, MachineInstr *Root,
99 SmallVectorImpl<MachineInstr *> &InsInstrs,
100 SmallVectorImpl<MachineInstr *> &DelInstrs,
101 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
102 unsigned Pattern,
bool SlackIsAccurate);
103 bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *
MBB,
104 SmallVectorImpl<MachineInstr *> &InsInstrs,
105 SmallVectorImpl<MachineInstr *> &DelInstrs,
107 bool preservesResourceLen(MachineBasicBlock *
MBB,
109 SmallVectorImpl<MachineInstr *> &InsInstrs,
110 SmallVectorImpl<MachineInstr *> &DelInstrs);
111 void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
112 SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
113 std::pair<unsigned, unsigned>
114 getLatenciesForInstrSequences(MachineInstr &
MI,
115 SmallVectorImpl<MachineInstr *> &InsInstrs,
116 SmallVectorImpl<MachineInstr *> &DelInstrs,
123char MachineCombiner::ID = 0;
127 "Machine InstCombiner",
false,
false)
133void MachineCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
134 AU.setPreservesCFG();
147 MachineInstr *DefInstr =
nullptr;
155bool MachineCombiner::isTransientMI(
const MachineInstr *
MI) {
157 return MI->isTransient();
163 if (!
MI->isFullCopy()) {
165 if (
MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
168 auto SrcSub =
MI->getOperand(1).getSubReg();
171 return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) !=
nullptr;
174 if (Src.isPhysical() && Dst.isPhysical())
177 if (Src.isVirtual() && Dst.isVirtual()) {
200MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
201 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
203 const MachineBasicBlock &
MBB) {
204 SmallVector<unsigned, 16> InstrDepth;
208 for (
auto *InstrPtr : InsInstrs) {
210 for (
const MachineOperand &MO : InstrPtr->all_uses()) {
214 unsigned DepthOp = 0;
215 unsigned LatencyOp = 0;
217 if (
II != InstrIdxForVirtReg.
end()) {
220 MachineInstr *DefInstr = InsInstrs[
II->second];
222 "There must be a definition for a new virtual register");
223 DepthOp = InstrDepth[
II->second];
227 InstrPtr->findRegisterUseOperandIdx(MO.
getReg(),
nullptr);
231 MachineInstr *DefInstr = getOperandDef(MO);
232 if (DefInstr && (
TII->getMachineCombinerTraceStrategy() !=
233 MachineTraceStrategy::TS_Local ||
236 if (!isTransientMI(DefInstr))
242 InstrPtr->findRegisterUseOperandIdx(MO.
getReg(),
246 IDepth = std::max(IDepth, DepthOp + LatencyOp);
250 unsigned NewRootIdx = InsInstrs.size() - 1;
251 return InstrDepth[NewRootIdx];
263unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
266 unsigned NewRootLatency = 0;
268 for (
const MachineOperand &MO : NewRoot->
all_defs()) {
278 unsigned LatencyOp = 0;
286 LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
288 NewRootLatency = std::max(NewRootLatency, LatencyOp);
290 return NewRootLatency;
297 case MachineCombinerPattern::REASSOC_AX_BY:
298 case MachineCombinerPattern::REASSOC_AX_YB:
299 case MachineCombinerPattern::REASSOC_XA_BY:
300 case MachineCombinerPattern::REASSOC_XA_YB:
301 return CombinerObjective::MustReduceDepth;
303 return TII->getCombinerObjective(Pattern);
311std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
312 MachineInstr &
MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
313 SmallVectorImpl<MachineInstr *> &DelInstrs,
315 assert(!InsInstrs.
empty() &&
"Only support sequences that insert instrs.");
316 unsigned NewRootLatency = 0;
318 MachineInstr *NewRoot = InsInstrs.
back();
319 for (
unsigned i = 0; i < InsInstrs.
size() - 1; i++)
320 NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
321 NewRootLatency += getLatency(&
MI, NewRoot, BlockTrace);
323 unsigned RootLatency = 0;
324 for (
auto *
I : DelInstrs)
325 RootLatency += TSchedModel.computeInstrLatency(
I);
327 return {NewRootLatency, RootLatency};
330bool MachineCombiner::reduceRegisterPressure(
331 MachineInstr &Root, MachineBasicBlock *
MBB,
332 SmallVectorImpl<MachineInstr *> &InsInstrs,
333 SmallVectorImpl<MachineInstr *> &DelInstrs,
unsigned Pattern) {
346bool MachineCombiner::improvesCriticalPathLen(
347 MachineBasicBlock *
MBB, MachineInstr *Root,
349 SmallVectorImpl<MachineInstr *> &InsInstrs,
350 SmallVectorImpl<MachineInstr *> &DelInstrs,
351 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
unsigned Pattern,
352 bool SlackIsAccurate) {
354 unsigned NewRootDepth =
355 getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *
MBB);
358 LLVM_DEBUG(
dbgs() <<
" Dependence data for " << *Root <<
"\tNewRootDepth: "
359 << NewRootDepth <<
"\tRootDepth: " << RootDepth);
366 if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
369 ?
dbgs() <<
"\t and it does it\n"
370 :
dbgs() <<
"\t but it does NOT do it\n");
371 return NewRootDepth < RootDepth;
379 unsigned NewRootLatency, RootLatency;
380 if (
TII->accumulateInstrSeqToRootLatency(*Root)) {
381 std::tie(NewRootLatency, RootLatency) =
382 getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
384 NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.
back());
385 RootLatency = TSchedModel.computeInstrLatency(Root);
389 unsigned NewCycleCount = NewRootDepth + NewRootLatency;
390 unsigned OldCycleCount =
391 RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
393 <<
"\tRootLatency: " << RootLatency <<
"\n\tRootSlack: "
394 << RootSlack <<
" SlackIsAccurate=" << SlackIsAccurate
395 <<
"\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
396 <<
"\n\tRootDepth + RootLatency + RootSlack = "
399 ?
dbgs() <<
"\n\t It IMPROVES PathLen because"
400 :
dbgs() <<
"\n\t It DOES NOT improve PathLen because");
402 <<
", OldCycleCount = " << OldCycleCount <<
"\n");
404 return NewCycleCount <= OldCycleCount;
408void MachineCombiner::instr2instrSC(
409 SmallVectorImpl<MachineInstr *> &Instrs,
410 SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
411 for (
auto *InstrPtr : Instrs) {
412 unsigned Opc = InstrPtr->getOpcode();
413 unsigned Idx =
TII->get(
Opc).getSchedClass();
420bool MachineCombiner::preservesResourceLen(
422 SmallVectorImpl<MachineInstr *> &InsInstrs,
423 SmallVectorImpl<MachineInstr *> &DelInstrs) {
438 instr2instrSC(InsInstrs, InsInstrsSC);
439 instr2instrSC(DelInstrs, DelInstrsSC);
445 unsigned ResLenAfterCombine =
449 << ResLenBeforeCombine
450 <<
" and after: " << ResLenAfterCombine <<
"\n");
452 ResLenAfterCombine <=
453 ResLenBeforeCombine +
TII->getExtendResourceLenLimit()
454 ?
dbgs() <<
"\t\t As result it IMPROVES/PRESERVES Resource Length\n"
455 :
dbgs() <<
"\t\t As result it DOES NOT improve/preserve Resource "
458 return ResLenAfterCombine <=
459 ResLenBeforeCombine +
TII->getExtendResourceLenLimit();
481 unsigned Pattern,
bool IncrementalUpdate) {
491 for (
auto *InstrPtr : InsInstrs)
494 for (
auto *InstrPtr : DelInstrs) {
495 InstrPtr->eraseFromParent();
497 for (
auto *
I = RegUnits.
begin();
I != RegUnits.
end();) {
498 if (
I->MI == InstrPtr)
505 if (IncrementalUpdate)
506 for (
auto *InstrPtr : InsInstrs)
521bool MachineCombiner::combineInstructions(MachineBasicBlock *
MBB) {
525 bool IncrementalUpdate =
false;
527 decltype(BlockIter) LastUpdate;
531 TraceEnsemble = Traces->
getEnsemble(
TII->getMachineCombinerTraceStrategy());
538 bool DoRegPressureReduce =
539 TII->shouldReduceRegisterPressure(
MBB, &RegClassInfo);
541 while (BlockIter !=
MBB->
end()) {
542 auto &
MI = *BlockIter++;
543 SmallVector<unsigned, 16> Patterns;
571 if (!
TII->getMachineCombinerPatterns(
MI, Patterns, DoRegPressureReduce))
575 [[maybe_unused]]
long PrevLatencyDiff = std::numeric_limits<long>::max();
577 for (
const auto P : Patterns) {
580 DenseMap<Register, unsigned> InstrIdxForVirtReg;
581 TII->genAlternativeCodeSequence(
MI,
P, InsInstrs, DelInstrs,
586 if (InsInstrs.
empty())
590 dbgs() <<
"\tFor the Pattern (" << (int)
P
591 <<
") these instructions could be removed\n";
592 for (
auto const *InstrPtr : DelInstrs)
593 InstrPtr->print(
dbgs(),
false,
false,
595 dbgs() <<
"\tThese instructions could replace the removed ones\n";
596 for (
auto const *InstrPtr : InsInstrs)
597 InstrPtr->print(
dbgs(),
false,
false,
605 auto [NewRootLatency, RootLatency] = getLatenciesForInstrSequences(
607 long CurrentLatencyDiff = ((long)RootLatency) - ((
long)NewRootLatency);
608 assert(CurrentLatencyDiff <= PrevLatencyDiff &&
609 "Current pattern is expected to be better than the previous "
611 PrevLatencyDiff = CurrentLatencyDiff;
614 if (IncrementalUpdate && LastUpdate != BlockIter) {
616 TraceEnsemble->
updateDepths(LastUpdate, BlockIter, RegUnits);
617 LastUpdate = BlockIter;
620 if (DoRegPressureReduce &&
621 getCombinerObjective(
P) ==
622 CombinerObjective::MustReduceRegisterPressure) {
625 IncrementalUpdate =
true;
626 LastUpdate = BlockIter;
628 if (reduceRegisterPressure(
MI,
MBB, InsInstrs, DelInstrs,
P)) {
631 RegUnits,
TII,
P, IncrementalUpdate);
641 if (
ML &&
TII->isThroughputPattern(
P)) {
642 LLVM_DEBUG(
dbgs() <<
"\t Replacing due to throughput pattern in loop\n");
644 RegUnits,
TII,
P, IncrementalUpdate);
648 }
else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
650 << InsInstrs.size() <<
" < "
651 << DelInstrs.size() <<
")\n");
653 RegUnits,
TII,
P, IncrementalUpdate);
665 if (improvesCriticalPathLen(
MBB, &
MI, BlockTrace, InsInstrs, DelInstrs,
666 InstrIdxForVirtReg,
P,
667 !IncrementalUpdate) &&
668 preservesResourceLen(
MBB, BlockTrace, InsInstrs, DelInstrs)) {
671 IncrementalUpdate =
true;
672 LastUpdate = BlockIter;
676 RegUnits,
TII,
P, IncrementalUpdate);
685 for (
auto *InstrPtr : InsInstrs)
686 MF->deleteMachineInstr(InstrPtr);
688 InstrIdxForVirtReg.
clear();
692 if (
Changed && IncrementalUpdate)
697bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
702 TSchedModel.
init(STI);
704 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
705 Traces = &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
706 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
708 &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
710 TraceEnsemble =
nullptr;
717 <<
" Skipping pass: Target does not support machine combiner\n");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*–===//
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, MachineTraceMetrics::Ensemble *TraceEnsemble, LiveRegUnitSet &RegUnits, const TargetInstrInfo *TII, unsigned Pattern, bool IncrementalUpdate)
Inserts InsInstrs and deletes DelInstrs.
static cl::opt< bool > VerifyPatternOrder("machine-combiner-verify-pattern-order", cl::Hidden, cl::desc("Verify that the generated patterns are ordered by increasing latency"), cl::init(false))
static cl::opt< unsigned > inc_threshold("machine-combiner-inc-threshold", cl::Hidden, cl::desc("Incremental depth computation will be used for basic " "blocks with more instructions."), cl::init(500))
static cl::opt< bool > dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, cl::desc("Dump all substituted intrs"), cl::init(false))
Register const TargetRegisterInfo * TRI
Promote Memory to Register
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
iterator find(const_arg_type_t< KeyT > Val)
bool useMachineCombiner() const override
This is an alternative analysis pass to MachineBlockFrequencyInfo.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
LLVM_ABI int findRegisterUseOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static reg_iterator reg_end()
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
reg_iterator reg_begin(Register RegNo) const
defusechain_iterator< true, true, false, true, false > reg_iterator
reg_iterator/reg_begin/reg_end - Walk all defs and uses of the specified register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, LiveRegUnitSet &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, LiveRegUnitSet &RegUnits)
Updates the depth of the instructions from Start to End.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks={}, ArrayRef< const MCSchedClassDesc * > ExtraInstrs={}, ArrayRef< const MCSchedClassDesc * > RemoveInstrs={}) const
Return the resource length of the trace.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void verifyAnalysis() const
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
const_iterator begin() const
const_iterator end() const
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
TargetInstrInfo - Interface to description of machine instruction set.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
bool hasInstrSchedModelOrItineraries() const
Return true if this machine model includes an instruction-level scheduling model or cycle-to-cycle it...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI char & MachineCombinerID
This pass performs instruction combining using trace metrics to estimate critical-path and resource d...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
SparseSet< LiveRegUnit, MCRegUnit, MCRegUnitToIndex > LiveRegUnitSet
ArrayRef(const T &OneElt) -> ArrayRef< T >
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...