LLVM 19.0.0git
MachineTraceMetrics.h
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interface for the MachineTraceMetrics analysis pass
10// that estimates CPU resource usage and critical data dependency paths through
11// preferred traces. This is useful for super-scalar CPUs where execution speed
12// can be limited both by data dependencies and by limited execution resources.
13//
14// Out-of-order CPUs will often be executing instructions from multiple basic
15// blocks at the same time. This makes it difficult to estimate the resource
16// usage accurately in a single basic block. Resources can be estimated better
17// by looking at a trace through the current basic block.
18//
19// For every block, the MachineTraceMetrics pass will pick a preferred trace
20// that passes through the block. The trace is chosen based on loop structure,
21// branch probabilities, and resource usage. The intention is to pick likely
22// traces that would be the most affected by code transformations.
23//
24// It is expensive to compute a full arbitrary trace for every block, so to
25// save some computations, traces are chosen to be convergent. This means that
26// if the traces through basic blocks A and B ever cross when moving away from
27// A and B, they never diverge again. This applies in both directions - If the
28// traces meet above A and B, they won't diverge when going further back.
29//
30// Traces tend to align with loops. The trace through a block in an inner loop
31// will begin at the loop entry block and end at a back edge. If there are
32// nested loops, the trace may begin and end at those instead.
33//
34// For each trace, we compute the critical path length, which is the number of
35// cycles required to execute the trace when execution is limited by data
36// dependencies only. We also compute the resource height, which is the number
37// of cycles required to execute all instructions in the trace when ignoring
38// data dependencies.
39//
40// Every instruction in the current block has a slack - the number of cycles
41// execution of the instruction can be delayed without extending the critical
42// path.
43//
44//===----------------------------------------------------------------------===//
45
46#ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
47#define LLVM_CODEGEN_MACHINETRACEMETRICS_H
48
49#include "llvm/ADT/SparseSet.h"
50#include "llvm/ADT/ArrayRef.h"
51#include "llvm/ADT/DenseMap.h"
56
57namespace llvm {
58
59class AnalysisUsage;
60class MachineFunction;
61class MachineInstr;
62class MachineLoop;
63class MachineLoopInfo;
64class MachineRegisterInfo;
65struct MCSchedClassDesc;
66class raw_ostream;
67class TargetInstrInfo;
68class TargetRegisterInfo;
69
70// Keep track of physreg data dependencies by recording each live register unit.
71// Associate each regunit with an instruction operand. Depending on the
72// direction instructions are scanned, it could be the operand that defined the
73// regunit, or the highest operand to read the regunit.
75 unsigned RegUnit;
76 unsigned Cycle = 0;
77 const MachineInstr *MI = nullptr;
78 unsigned Op = 0;
79
80 unsigned getSparseSetIndex() const { return RegUnit; }
81
82 LiveRegUnit(unsigned RU) : RegUnit(RU) {}
83};
84
85/// Strategies for selecting traces.
87 /// Select the trace through a block that has the fewest instructions.
89 /// Select the trace that contains only the current basic block. For instance,
90 /// this strategy can be used by MachineCombiner to make better decisions when
91 /// we estimate critical path for in-order cores.
94};
95
97 const MachineFunction *MF = nullptr;
98 const TargetInstrInfo *TII = nullptr;
99 const TargetRegisterInfo *TRI = nullptr;
100 const MachineRegisterInfo *MRI = nullptr;
101 const MachineLoopInfo *Loops = nullptr;
102 TargetSchedModel SchedModel;
103
104public:
105 friend class Ensemble;
106 friend class Trace;
107
108 class Ensemble;
109
110 static char ID;
111
113
114 void getAnalysisUsage(AnalysisUsage&) const override;
116 void releaseMemory() override;
117 void verifyAnalysis() const override;
118
119 /// Per-basic block information that doesn't depend on the trace through the
120 /// block.
122 /// The number of non-trivial instructions in the block.
123 /// Doesn't count PHI and COPY instructions that are likely to be removed.
124 unsigned InstrCount = ~0u;
125
126 /// True when the block contains calls.
127 bool HasCalls = false;
128
129 FixedBlockInfo() = default;
130
131 /// Returns true when resource information for this block has been computed.
132 bool hasResources() const { return InstrCount != ~0u; }
133
134 /// Invalidate resource information.
135 void invalidate() { InstrCount = ~0u; }
136 };
137
138 /// Get the fixed resource information about MBB. Compute it on demand.
139 const FixedBlockInfo *getResources(const MachineBasicBlock*);
140
141 /// Get the scaled number of cycles used per processor resource in MBB.
142 /// This is an array with SchedModel.getNumProcResourceKinds() entries.
143 /// The getResources() function above must have been called first.
144 ///
145 /// These numbers have already been scaled by SchedModel.getResourceFactor().
146 ArrayRef<unsigned> getProcReleaseAtCycles(unsigned MBBNum) const;
147
148 /// A virtual register or regunit required by a basic block or its trace
149 /// successors.
150 struct LiveInReg {
151 /// The virtual register required, or a register unit.
153
154 /// For virtual registers: Minimum height of the defining instruction.
155 /// For regunits: Height of the highest user in the trace.
156 unsigned Height;
157
159 };
160
161 /// Per-basic block information that relates to a specific trace through the
162 /// block. Convergent traces means that only one of these is required per
163 /// block in a trace ensemble.
165 /// Trace predecessor, or NULL for the first block in the trace.
166 /// Valid when hasValidDepth().
167 const MachineBasicBlock *Pred = nullptr;
168
169 /// Trace successor, or NULL for the last block in the trace.
170 /// Valid when hasValidHeight().
171 const MachineBasicBlock *Succ = nullptr;
172
173 /// The block number of the head of the trace. (When hasValidDepth()).
174 unsigned Head;
175
176 /// The block number of the tail of the trace. (When hasValidHeight()).
177 unsigned Tail;
178
179 /// Accumulated number of instructions in the trace above this block.
180 /// Does not include instructions in this block.
181 unsigned InstrDepth = ~0u;
182
183 /// Accumulated number of instructions in the trace below this block.
184 /// Includes instructions in this block.
185 unsigned InstrHeight = ~0u;
186
187 TraceBlockInfo() = default;
188
189 /// Returns true if the depth resources have been computed from the trace
190 /// above this block.
191 bool hasValidDepth() const { return InstrDepth != ~0u; }
192
193 /// Returns true if the height resources have been computed from the trace
194 /// below this block.
195 bool hasValidHeight() const { return InstrHeight != ~0u; }
196
197 /// Invalidate depth resources when some block above this one has changed.
199
200 /// Invalidate height resources when a block below this one has changed.
202
203 /// Assuming that this is a dominator of TBI, determine if it contains
204 /// useful instruction depths. A dominating block can be above the current
205 /// trace head, and any dependencies from such a far away dominator are not
206 /// expected to affect the critical path.
207 ///
208 /// Also returns true when TBI == this.
209 bool isUsefulDominator(const TraceBlockInfo &TBI) const {
210 // The trace for TBI may not even be calculated yet.
211 if (!hasValidDepth() || !TBI.hasValidDepth())
212 return false;
213 // Instruction depths are only comparable if the traces share a head.
214 if (Head != TBI.Head)
215 return false;
216 // It is almost always the case that TBI belongs to the same trace as
217 // this block, but rare convoluted cases involving irreducible control
218 // flow, a dominator may share a trace head without actually being on the
219 // same trace as TBI. This is not a big problem as long as it doesn't
220 // increase the instruction depth.
222 }
223
224 // Data-dependency-related information. Per-instruction depth and height
225 // are computed from data dependencies in the current trace, using
226 // itinerary data.
227
228 /// Instruction depths have been computed. This implies hasValidDepth().
230
231 /// Instruction heights have been computed. This implies hasValidHeight().
233
234 /// Critical path length. This is the number of cycles in the longest data
235 /// dependency chain through the trace. This is only valid when both
236 /// HasValidInstrDepths and HasValidInstrHeights are set.
237 unsigned CriticalPath;
238
239 /// Live-in registers. These registers are defined above the current block
240 /// and used by this block or a block below it.
241 /// This does not include PHI uses in the current block, but it does
242 /// include PHI uses in deeper blocks.
244
245 void print(raw_ostream&) const;
246 };
247
248 /// InstrCycles represents the cycle height and depth of an instruction in a
249 /// trace.
250 struct InstrCycles {
251 /// Earliest issue cycle as determined by data dependencies and instruction
252 /// latencies from the beginning of the trace. Data dependencies from
253 /// before the trace are not included.
254 unsigned Depth;
255
256 /// Minimum number of cycles from this instruction is issued to the of the
257 /// trace, as determined by data dependencies and instruction latencies.
258 unsigned Height;
259 };
260
261 /// A trace represents a plausible sequence of executed basic blocks that
262 /// passes through the current basic block one. The Trace class serves as a
263 /// handle to internal cached data structures.
264 class Trace {
265 Ensemble &TE;
266 TraceBlockInfo &TBI;
267
268 unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
269
270 public:
271 explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
272
273 void print(raw_ostream&) const;
274
275 /// Compute the total number of instructions in the trace.
276 unsigned getInstrCount() const {
277 return TBI.InstrDepth + TBI.InstrHeight;
278 }
279
280 /// Return the resource depth of the top/bottom of the trace center block.
281 /// This is the number of cycles required to execute all instructions from
282 /// the trace head to the trace center block. The resource depth only
283 /// considers execution resources, it ignores data dependencies.
284 /// When Bottom is set, instructions in the trace center block are included.
285 unsigned getResourceDepth(bool Bottom) const;
286
287 /// Return the resource length of the trace. This is the number of cycles
288 /// required to execute the instructions in the trace if they were all
289 /// independent, exposing the maximum instruction-level parallelism.
290 ///
291 /// Any blocks in Extrablocks are included as if they were part of the
292 /// trace. Likewise, extra resources required by the specified scheduling
293 /// classes are included. For the caller to account for extra machine
294 /// instructions, it must first resolve each instruction's scheduling class.
295 unsigned getResourceLength(
296 ArrayRef<const MachineBasicBlock *> Extrablocks = std::nullopt,
297 ArrayRef<const MCSchedClassDesc *> ExtraInstrs = std::nullopt,
298 ArrayRef<const MCSchedClassDesc *> RemoveInstrs = std::nullopt) const;
299
300 /// Return the length of the (data dependency) critical path through the
301 /// trace.
302 unsigned getCriticalPath() const { return TBI.CriticalPath; }
303
304 /// Return the depth and height of MI. The depth is only valid for
305 /// instructions in or above the trace center block. The height is only
306 /// valid for instructions in or below the trace center block.
308 return TE.Cycles.lookup(&MI);
309 }
310
311 /// Return the slack of MI. This is the number of cycles MI can be delayed
312 /// before the critical path becomes longer.
313 /// MI must be an instruction in the trace center block.
314 unsigned getInstrSlack(const MachineInstr &MI) const;
315
316 /// Return the Depth of a PHI instruction in a trace center block successor.
317 /// The PHI does not have to be part of the trace.
318 unsigned getPHIDepth(const MachineInstr &PHI) const;
319
320 /// A dependence is useful if the basic block of the defining instruction
321 /// is part of the trace of the user instruction. It is assumed that DefMI
322 /// dominates UseMI (see also isUsefulDominator).
323 bool isDepInTrace(const MachineInstr &DefMI,
324 const MachineInstr &UseMI) const;
325 };
326
327 /// A trace ensemble is a collection of traces selected using the same
328 /// strategy, for example 'minimum resource height'. There is one trace for
329 /// every block in the function.
330 class Ensemble {
331 friend class Trace;
332
335 SmallVector<unsigned, 0> ProcResourceDepths;
336 SmallVector<unsigned, 0> ProcResourceHeights;
337
338 void computeTrace(const MachineBasicBlock*);
339 void computeDepthResources(const MachineBasicBlock*);
340 void computeHeightResources(const MachineBasicBlock*);
341 unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
342 void computeInstrDepths(const MachineBasicBlock*);
343 void computeInstrHeights(const MachineBasicBlock*);
344 void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
346
347 protected:
349
350 explicit Ensemble(MachineTraceMetrics*);
351
354 const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
357 ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
358 ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
359
360 public:
361 virtual ~Ensemble();
362
363 virtual const char *getName() const = 0;
364 void print(raw_ostream&) const;
365 void invalidate(const MachineBasicBlock *MBB);
366 void verify() const;
367
368 /// Get the trace that passes through MBB.
369 /// The trace is computed on demand.
371
372 /// Updates the depth of an machine instruction, given RegUnits.
373 void updateDepth(TraceBlockInfo &TBI, const MachineInstr&,
374 SparseSet<LiveRegUnit> &RegUnits);
375 void updateDepth(const MachineBasicBlock *, const MachineInstr&,
376 SparseSet<LiveRegUnit> &RegUnits);
377
378 /// Updates the depth of the instructions from Start to End.
381 SparseSet<LiveRegUnit> &RegUnits);
382
383 };
384
385 /// Get the trace ensemble representing the given trace selection strategy.
386 /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
387 /// and valid for the lifetime of the analysis pass.
389
390 /// Invalidate cached information about MBB. This must be called *before* MBB
391 /// is erased, or the CFG is otherwise changed.
392 ///
393 /// This invalidates per-block information about resource usage for MBB only,
394 /// and it invalidates per-trace information for any trace that passes
395 /// through MBB.
396 ///
397 /// Call Ensemble::getTrace() again to update any trace handles.
398 void invalidate(const MachineBasicBlock *MBB);
399
400private:
401 // One entry per basic block, indexed by block number.
403
404 // Cycles consumed on each processor resource per block.
405 // The number of processor resource kinds is constant for a given subtarget,
406 // but it is not known at compile time. The number of cycles consumed by
407 // block B on processor resource R is at ProcReleaseAtCycles[B*Kinds + R]
408 // where Kinds = SchedModel.getNumProcResourceKinds().
409 SmallVector<unsigned, 0> ProcReleaseAtCycles;
410
411 // One ensemble per strategy.
413 *Ensembles[static_cast<size_t>(MachineTraceStrategy::TS_NumStrategies)];
414
415 // Convert scaled resource usage to a cycle count that can be compared with
416 // latencies.
417 unsigned getCycles(unsigned Scaled) {
418 unsigned Factor = SchedModel.getLatencyFactor();
419 return (Scaled + Factor - 1) / Factor;
420 }
421};
422
424 const MachineTraceMetrics::Trace &Tr) {
425 Tr.print(OS);
426 return OS;
427}
428
431 En.print(OS);
432 return OS;
433}
434
435} // end namespace llvm
436
437#endif // LLVM_CODEGEN_MACHINETRACEMETRICS_H
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
Rewrite undef for PHI
@ Scaled
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
IRTranslator LLVM IR MI
LiveInRegUnits addLiveIns(MBB)
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
A possibly irreducible generalization of a Loop.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Representation of each machine instruction.
Definition: MachineInstr.h:68
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
virtual const MachineBasicBlock * pickTracePred(const MachineBasicBlock *)=0
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
virtual const char * getName() const =0
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
virtual const MachineBasicBlock * pickTraceSucc(const MachineBasicBlock *)=0
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getInstrCount() const
Compute the total number of instructions in the trace.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=std::nullopt, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=std::nullopt, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=std::nullopt) const
Return the resource length of the trace.
Trace(Ensemble &te, TraceBlockInfo &tbi)
unsigned getCriticalPath() const
Return the length of the (data dependency) critical path through the trace.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
ArrayRef< unsigned > getProcReleaseAtCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned getSparseSetIndex() const
const MachineInstr * MI
LiveRegUnit(unsigned RU)
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
void invalidate()
Invalidate resource information.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
A virtual register or regunit required by a basic block or its trace successors.
unsigned Height
For virtual registers: Minimum height of the defining instruction.
Register Reg
The virtual register required, or a register unit.
LiveInReg(Register Reg, unsigned Height=0)
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
SmallVector< LiveInReg, 4 > LiveIns
Live-in registers.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned CriticalPath
Critical path length.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().