LLVM 17.0.0git
TargetSchedule.cpp
Go to the documentation of this file.
1//===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a wrapper around MCSchedModel that allows the interface
10// to benefit from information currently only available in TargetInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
20#include "llvm/MC/MCInstrDesc.h"
22#include "llvm/MC/MCSchedule.h"
26#include <algorithm>
27#include <cassert>
28#include <cstdint>
29#include <numeric>
30
31using namespace llvm;
32
34 cl::desc("Use TargetSchedModel for latency lookup"));
35
37 cl::desc("Use InstrItineraryData for latency lookup"));
38
40 return EnableSchedModel && SchedModel.hasInstrSchedModel();
41}
42
44 return EnableSchedItins && !InstrItins.isEmpty();
45}
46
48 STI = TSInfo;
49 SchedModel = TSInfo->getSchedModel();
50 TII = TSInfo->getInstrInfo();
51 STI->initInstrItins(InstrItins);
52
53 unsigned NumRes = SchedModel.getNumProcResourceKinds();
54 ResourceFactors.resize(NumRes);
55 ResourceLCM = SchedModel.IssueWidth;
56 for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
57 unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
58 if (NumUnits > 0)
59 ResourceLCM = std::lcm(ResourceLCM, NumUnits);
60 }
61 MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
62 for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
63 unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
64 ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
65 }
66}
67
68/// Returns true only if instruction is specified as single issue.
70 const MCSchedClassDesc *SC) const {
71 if (hasInstrSchedModel()) {
72 if (!SC)
74 if (SC->isValid())
75 return SC->BeginGroup;
76 }
77 return false;
78}
79
81 const MCSchedClassDesc *SC) const {
82 if (hasInstrSchedModel()) {
83 if (!SC)
85 if (SC->isValid())
86 return SC->EndGroup;
87 }
88 return false;
89}
90
92 const MCSchedClassDesc *SC) const {
93 if (hasInstrItineraries()) {
94 int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
95 return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI);
96 }
97 if (hasInstrSchedModel()) {
98 if (!SC)
100 if (SC->isValid())
101 return SC->NumMicroOps;
102 }
103 return MI->isTransient() ? 0 : 1;
104}
105
106// The machine model may explicitly specify an invalid latency, which
107// effectively means infinite latency. Since users of the TargetSchedule API
108// don't know how to handle this, we convert it to a very large latency that is
109// easy to distinguish when debugging the DAG but won't induce overflow.
110static unsigned capLatency(int Cycles) {
111 return Cycles >= 0 ? Cycles : 1000;
112}
113
114/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
115/// evaluation of predicates that depend on instruction operands or flags.
117resolveSchedClass(const MachineInstr *MI) const {
118 // Get the definition's scheduling class descriptor from this machine model.
119 unsigned SchedClass = MI->getDesc().getSchedClass();
120 const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
121 if (!SCDesc->isValid())
122 return SCDesc;
123
124#ifndef NDEBUG
125 unsigned NIter = 0;
126#endif
127 while (SCDesc->isVariant()) {
128 assert(++NIter < 6 && "Variants are nested deeper than the magic number");
129
130 SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
131 SCDesc = SchedModel.getSchedClassDesc(SchedClass);
132 }
133 return SCDesc;
134}
135
136/// Find the def index of this operand. This index maps to the machine model and
137/// is independent of use operands. Def operands may be reordered with uses or
138/// merged with uses without affecting the def index (e.g. before/after
139/// regalloc). However, an instruction's def operands must never be reordered
140/// with respect to each other.
141static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
142 unsigned DefIdx = 0;
143 for (unsigned i = 0; i != DefOperIdx; ++i) {
144 const MachineOperand &MO = MI->getOperand(i);
145 if (MO.isReg() && MO.isDef())
146 ++DefIdx;
147 }
148 return DefIdx;
149}
150
151/// Find the use index of this operand. This is independent of the instruction's
152/// def operands.
153///
154/// Note that uses are not determined by the operand's isUse property, which
155/// is simply the inverse of isDef. Here we consider any readsReg operand to be
156/// a "use". The machine model allows an operand to be both a Def and Use.
157static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
158 unsigned UseIdx = 0;
159 for (unsigned i = 0; i != UseOperIdx; ++i) {
160 const MachineOperand &MO = MI->getOperand(i);
161 if (MO.isReg() && MO.readsReg() && !MO.isDef())
162 ++UseIdx;
163 }
164 return UseIdx;
165}
166
167// Top-level API for clients that know the operand indices.
169 const MachineInstr *DefMI, unsigned DefOperIdx,
170 const MachineInstr *UseMI, unsigned UseOperIdx) const {
171
173 return TII->defaultDefLatency(SchedModel, *DefMI);
174
175 if (hasInstrItineraries()) {
176 int OperLatency = 0;
177 if (UseMI) {
178 OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
179 *UseMI, UseOperIdx);
180 }
181 else {
182 unsigned DefClass = DefMI->getDesc().getSchedClass();
183 OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
184 }
185 if (OperLatency >= 0)
186 return OperLatency;
187
188 // No operand latency was found.
189 unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
190
191 // Expected latency is the max of the stage latency and itinerary props.
192 // Rather than directly querying InstrItins stage latency, we call a TII
193 // hook to allow subtargets to specialize latency. This hook is only
194 // applicable to the InstrItins model. InstrSchedModel should model all
195 // special cases without TII hooks.
196 InstrLatency =
197 std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
198 return InstrLatency;
199 }
200 // hasInstrSchedModel()
202 unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
203 if (DefIdx < SCDesc->NumWriteLatencyEntries) {
204 // Lookup the definition's write latency in SubtargetInfo.
205 const MCWriteLatencyEntry *WLEntry =
206 STI->getWriteLatencyEntry(SCDesc, DefIdx);
207 unsigned WriteID = WLEntry->WriteResourceID;
208 unsigned Latency = capLatency(WLEntry->Cycles);
209 if (!UseMI)
210 return Latency;
211
212 // Lookup the use's latency adjustment in SubtargetInfo.
213 const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
214 if (UseDesc->NumReadAdvanceEntries == 0)
215 return Latency;
216 unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
217 int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
218 if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
219 return 0;
220 return Latency - Advance;
221 }
222 // If DefIdx does not exist in the model (e.g. implicit defs), then return
223 // unit latency (defaultDefLatency may be too conservative).
224#ifndef NDEBUG
225 if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
226 !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
227 SchedModel.isComplete()) {
228 errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
229 << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
230 llvm_unreachable("incomplete machine model");
231 }
232#endif
233 // FIXME: Automatically giving all implicit defs defaultDefLatency is
234 // undesirable. We should only do it for defs that are known to the MC
235 // desc like flags. Truly implicit defs should get 1 cycle latency.
236 return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
237}
238
239unsigned
240TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
241 return capLatency(MCSchedModel::computeInstrLatency(*STI, SCDesc));
242}
243
244unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
245 assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
246 unsigned SCIdx = TII->get(Opcode).getSchedClass();
247 return capLatency(SchedModel.computeInstrLatency(*STI, SCIdx));
248}
249
250unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const {
251 if (hasInstrSchedModel())
252 return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst));
253 return computeInstrLatency(Inst.getOpcode());
254}
255
256unsigned
257TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
258 bool UseDefaultDefLatency) const {
259 // For the itinerary model, fall back to the old subtarget hook.
260 // Allow subtargets to compute Bundle latencies outside the machine model.
261 if (hasInstrItineraries() || MI->isBundle() ||
262 (!hasInstrSchedModel() && !UseDefaultDefLatency))
263 return TII->getInstrLatency(&InstrItins, *MI);
264
265 if (hasInstrSchedModel()) {
266 const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
267 if (SCDesc->isValid())
268 return computeInstrLatency(*SCDesc);
269 }
270 return TII->defaultDefLatency(SchedModel, *MI);
271}
272
274computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
275 const MachineInstr *DepMI) const {
276 if (!SchedModel.isOutOfOrder())
277 return 1;
278
279 // Out-of-order processor can dispatch WAW dependencies in the same cycle.
280
281 // Treat predication as a data dependency for out-of-order cpus. In-order
282 // cpus do not need to treat predicated writes specially.
283 //
284 // TODO: The following hack exists because predication passes do not
285 // correctly append imp-use operands, and readsReg() strangely returns false
286 // for predicated defs.
287 Register Reg = DefMI->getOperand(DefOperIdx).getReg();
288 const MachineFunction &MF = *DefMI->getMF();
290 if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
291 return computeInstrLatency(DefMI);
292
293 // If we have a per operand scheduling model, check if this def is writing
294 // an unbuffered resource. If so, it treated like an in-order cpu.
295 if (hasInstrSchedModel()) {
297 if (SCDesc->isValid()) {
298 for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
299 *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
300 if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize)
301 return 1;
302 }
303 }
304 }
305 return 0;
306}
307
308double
310 if (hasInstrItineraries()) {
311 unsigned SchedClass = MI->getDesc().getSchedClass();
314 }
315
316 if (hasInstrSchedModel())
318
319 return 0.0;
320}
321
322double
324 unsigned SchedClass = TII->get(Opcode).getSchedClass();
328 if (hasInstrSchedModel()) {
329 const MCSchedClassDesc &SCDesc = *SchedModel.getSchedClassDesc(SchedClass);
330 if (SCDesc.isValid() && !SCDesc.isVariant())
331 return MCSchedModel::getReciprocalThroughput(*STI, SCDesc);
332 }
333
334 return 0.0;
335}
336
337double
339 if (hasInstrSchedModel())
340 return SchedModel.getReciprocalThroughput(*STI, *TII, MI);
341 return computeReciprocalThroughput(MI.getOpcode());
342}
343
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx)
Find the use index of this operand.
static unsigned capLatency(int Cycles)
static cl::opt< bool > EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), cl::desc("Use TargetSchedModel for latency lookup"))
static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx)
Find the def index of this operand.
static cl::opt< bool > EnableSchedItins("scheditins", cl::Hidden, cl::init(true), cl::desc("Use InstrItineraryData for latency lookup"))
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getOpcode() const
Definition: MCInst.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:596
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
int getReadAdvanceCycles(const MCSchedClassDesc *SC, unsigned UseIdx, unsigned WriteResID) const
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
void initInstrItins(InstrItineraryData &InstrItins) const
Initialize an InstrItineraryData instance.
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
MachineOperand class - Representation of each machine instruction operand.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void resize(size_type N)
Definition: SmallVector.h:642
virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const
Return the number of u-operations the given machine instruction will be decoded to on the target cpu.
virtual int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const
virtual bool isPredicated(const MachineInstr &MI) const
Returns true if the instruction is already predicated.
virtual unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const
Compute the instruction latency of a given instruction.
unsigned defaultDefLatency(const MCSchedModel &SchedModel, const MachineInstr &DefMI) const
Return the default expected latency for a def based on its opcode.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
double computeReciprocalThroughput(const MachineInstr *MI) const
Compute the reciprocal throughput of the given instruction.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const InstrItineraryData * getInstrItineraries() const
bool hasInstrItineraries() const
Return true if this machine model includes cycle-to-cycle itinerary data.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
virtual unsigned resolveSchedClass(unsigned SchedClass, const MachineInstr *MI, const TargetSchedModel *SchedModel) const
Resolve a SchedClass at runtime, where SchedClass identifies an MCSchedClassDesc with the isVariant p...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:109
bool isValid() const
Definition: MCSchedule.h:127
bool isVariant() const
Definition: MCSchedule.h:130
uint16_t NumReadAdvanceEntries
Definition: MCSchedule.h:125
bool isOutOfOrder() const
Return true if machine supports out of order execution.
Definition: MCSchedule.h:333
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Definition: MCSchedule.cpp:41
unsigned IssueWidth
Definition: MCSchedule.h:256
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
static double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Definition: MCSchedule.cpp:89
bool isComplete() const
Return true if this machine model data for all instructions with a scheduling class (itinerary class ...
Definition: MCSchedule.h:330
Specify the latency in cpu cycles for a particular scheduling class and def index.
Definition: MCSchedule.h:77
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63