LLVM 22.0.0git
SystemZHazardRecognizer.cpp
Go to the documentation of this file.
1//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a hazard recognizer for the SystemZ scheduler.
10//
11// This class is used by the SystemZ scheduling strategy to maintain
12// the state during scheduling, and provide cost functions for
13// scheduling candidates. This includes:
14//
15// * Decoder grouping. A decoder group can maximally hold 3 uops, and
16// instructions that always begin a new group should be scheduled when
17// the current decoder group is empty.
18// * Processor resources usage. It is beneficial to balance the use of
19// resources.
20//
21// A goal is to consider all instructions, also those outside of any
22// scheduling region. Such instructions are "advanced" past and include
23// single instructions before a scheduling region, branches etc.
24//
25// A block that has only one predecessor continues scheduling with the state
26// of it (which may be updated by emitting branches).
27//
28// ===---------------------------------------------------------------------===//
29
31#include "llvm/ADT/Statistic.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "machine-scheduler"
36
37// This is the limit of processor resource usage at which the
38// scheduler should try to look for other instructions (not using the
39// critical resource).
40static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
41 cl::desc("The OOO window for processor "
42 "resources during scheduling."),
43 cl::init(8));
44
45unsigned SystemZHazardRecognizer::
46getNumDecoderSlots(SUnit *SU) const {
47 const MCSchedClassDesc *SC = getSchedClass(SU);
48 if (!SC->isValid())
49 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
50
51 assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
52 "Only cracked instruction can have 2 uops.");
53 assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
54 "Expanded instructions always group alone.");
55 assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
56 "Expanded instructions fill the group(s).");
57
58 return SC->NumMicroOps;
59}
60
61unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
62 unsigned Idx = CurrGroupSize;
63 if (GrpCount % 2)
64 Idx += 3;
65
66 if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
67 if (Idx == 1 || Idx == 2)
68 Idx = 3;
69 else if (Idx == 4 || Idx == 5)
70 Idx = 0;
71 }
72
73 return Idx;
74}
75
77getHazardType(SUnit *SU, int Stalls) {
78 return (fitsIntoCurrentGroup(SU) ? NoHazard : Hazard);
79}
80
82 CurrGroupSize = 0;
83 CurrGroupHas4RegOps = false;
84 clearProcResCounters();
85 GrpCount = 0;
86 LastFPdOpCycleIdx = UINT_MAX;
87 LastEmittedMI = nullptr;
89}
90
91bool
92SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
93 const MCSchedClassDesc *SC = getSchedClass(SU);
94 if (!SC->isValid())
95 return true;
96
97 // A cracked instruction only fits into schedule if the current
98 // group is empty.
99 if (SC->BeginGroup)
100 return (CurrGroupSize == 0);
101
102 // An instruction with 4 register operands will not fit in last slot.
103 assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
104 "Current decoder group is already full!");
105 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
106 return false;
107
108 // Since a full group is handled immediately in EmitInstruction(),
109 // SU should fit into current group. NumSlots should be 1 or 0,
110 // since it is not a cracked or expanded instruction.
111 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
112 "Expected normal instruction to fit in non-full group!");
113
114 return true;
115}
116
117bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
118 const MCInstrDesc &MID = MI->getDesc();
119 unsigned Count = 0;
120 for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
121 const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx);
122 if (RC == nullptr)
123 continue;
124 if (OpIdx >= MID.getNumDefs() &&
126 continue;
127 Count++;
128 }
129 return Count >= 4;
130}
131
132void SystemZHazardRecognizer::nextGroup() {
133 if (CurrGroupSize == 0)
134 return;
135
136 LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
137 LLVM_DEBUG(CurGroupDbg = "";);
138
139 int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
140 assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
141 "Current decoder group bad.");
142
143 // Reset counter for next group.
144 CurrGroupSize = 0;
145 CurrGroupHas4RegOps = false;
146
147 GrpCount += ((unsigned) NumGroups);
148
149 // Decrease counters for execution units.
150 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
151 ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
152 ? (ProcResourceCounters[i] - NumGroups)
153 : 0);
154
155 // Clear CriticalResourceIdx if it is now below the threshold.
156 if (CriticalResourceIdx != UINT_MAX &&
157 (ProcResourceCounters[CriticalResourceIdx] <=
159 CriticalResourceIdx = UINT_MAX;
160
162}
163
164#ifndef NDEBUG // Debug output
166 OS << "SU(" << SU->NodeNum << "):";
167 OS << TII->getName(SU->getInstr()->getOpcode());
168
169 const MCSchedClassDesc *SC = getSchedClass(SU);
170 if (!SC->isValid())
171 return;
172
174 PI = SchedModel->getWriteProcResBegin(SC),
175 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
176 const MCProcResourceDesc &PRD =
177 *SchedModel->getProcResource(PI->ProcResourceIdx);
178 std::string FU(PRD.Name);
179 // trim e.g. Z13_FXaUnit -> FXa
180 FU = FU.substr(FU.find('_') + 1);
181 size_t Pos = FU.find("Unit");
182 if (Pos != std::string::npos)
183 FU.resize(Pos);
184 if (FU == "LS") // LSUnit -> LSU
185 FU = "LSU";
186 OS << "/" << FU;
187
188 if (PI->ReleaseAtCycle> 1)
189 OS << "(" << PI->ReleaseAtCycle << "cyc)";
190 }
191
192 if (SC->NumMicroOps > 1)
193 OS << "/" << SC->NumMicroOps << "uops";
194 if (SC->BeginGroup && SC->EndGroup)
195 OS << "/GroupsAlone";
196 else if (SC->BeginGroup)
197 OS << "/BeginsGroup";
198 else if (SC->EndGroup)
199 OS << "/EndsGroup";
200 if (SU->isUnbuffered)
201 OS << "/Unbuffered";
202 if (has4RegOps(SU->getInstr()))
203 OS << "/4RegOps";
204}
205
206void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
207 dbgs() << "++ " << Msg;
208 dbgs() << ": ";
209
210 if (CurGroupDbg.empty())
211 dbgs() << " <empty>\n";
212 else {
213 dbgs() << "{ " << CurGroupDbg << " }";
214 dbgs() << " (" << CurrGroupSize << " decoder slot"
215 << (CurrGroupSize > 1 ? "s":"")
216 << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
217 << ")\n";
218 }
219}
220
222 bool any = false;
223
224 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
225 if (ProcResourceCounters[i] > 0) {
226 any = true;
227 break;
228 }
229
230 if (!any)
231 return;
232
233 dbgs() << "++ | Resource counters: ";
234 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
235 if (ProcResourceCounters[i] > 0)
236 dbgs() << SchedModel->getProcResource(i)->Name
237 << ":" << ProcResourceCounters[i] << " ";
238 dbgs() << "\n";
239
240 if (CriticalResourceIdx != UINT_MAX)
241 dbgs() << "++ | Critical resource: "
242 << SchedModel->getProcResource(CriticalResourceIdx)->Name
243 << "\n";
244}
245
247 dumpCurrGroup("| Current decoder group");
248 dbgs() << "++ | Current cycle index: "
249 << getCurrCycleIdx() << "\n";
251 if (LastFPdOpCycleIdx != UINT_MAX)
252 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
253}
254
255#endif //NDEBUG
256
257void SystemZHazardRecognizer::clearProcResCounters() {
258 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
259 CriticalResourceIdx = UINT_MAX;
260}
261
262static inline bool isBranchRetTrap(MachineInstr *MI) {
263 return (MI->isBranch() || MI->isReturn() ||
264 MI->getOpcode() == SystemZ::CondTrap);
265}
266
267// Update state with SU as the next scheduled unit.
270 const MCSchedClassDesc *SC = getSchedClass(SU);
271 LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
272 dbgs() << "\n";);
273 LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
274
275 // If scheduling an SU that must begin a new decoder group, move on
276 // to next group.
277 if (!fitsIntoCurrentGroup(SU))
278 nextGroup();
279
281 if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
282
283 LastEmittedMI = SU->getInstr();
284
285 // After returning from a call, we don't know much about the state.
286 if (SU->isCall) {
287 LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
288 Reset();
289 LastEmittedMI = SU->getInstr();
290 return;
291 }
292
293 // Increase counter for execution unit(s).
295 PI = SchedModel->getWriteProcResBegin(SC),
296 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
297 // Don't handle FPd together with the other resources.
298 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
299 continue;
300 int &CurrCounter =
301 ProcResourceCounters[PI->ProcResourceIdx];
302 CurrCounter += PI->ReleaseAtCycle;
303 // Check if this is now the new critical resource.
304 if ((CurrCounter > ProcResCostLim) &&
305 (CriticalResourceIdx == UINT_MAX ||
306 (PI->ProcResourceIdx != CriticalResourceIdx &&
307 CurrCounter >
308 ProcResourceCounters[CriticalResourceIdx]))) {
310 dbgs() << "++ New critical resource: "
311 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
312 << "\n";);
313 CriticalResourceIdx = PI->ProcResourceIdx;
314 }
315 }
316
317 // Make note of an instruction that uses a blocking resource (FPd).
318 if (SU->isUnbuffered) {
319 LastFPdOpCycleIdx = getCurrCycleIdx(SU);
320 LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
321 << "\n";);
322 }
323
324 // Insert SU into current group by increasing number of slots used
325 // in current group.
326 CurrGroupSize += getNumDecoderSlots(SU);
327 CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
328 unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
329 assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
330 && "SU does not fit into decoder group!");
331
332 // Check if current group is now full/ended. If so, move on to next
333 // group to be ready to evaluate more candidates.
334 if (CurrGroupSize >= GroupLim || SC->EndGroup)
335 nextGroup();
336}
337
339 const MCSchedClassDesc *SC = getSchedClass(SU);
340 if (!SC->isValid())
341 return 0;
342
343 // If SU begins new group, it can either break a current group early
344 // or fit naturally if current group is empty (negative cost).
345 if (SC->BeginGroup) {
346 if (CurrGroupSize)
347 return 3 - CurrGroupSize;
348 return -1;
349 }
350
351 // Similarly, a group-ending SU may either fit well (last in group), or
352 // end the group prematurely.
353 if (SC->EndGroup) {
354 unsigned ResultingGroupSize = (CurrGroupSize + getNumDecoderSlots(SU));
355 if (ResultingGroupSize < 3)
356 return (3 - ResultingGroupSize);
357 return -1;
358 }
359
360 // An instruction with 4 register operands will not fit in last slot.
361 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
362 return 1;
363
364 // Most instructions can be placed in any decoder slot.
365 return 0;
366}
367
368bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
369 assert (SU->isUnbuffered);
370 // If this is the first FPd op, it should be scheduled high.
371 if (LastFPdOpCycleIdx == UINT_MAX)
372 return true;
373 // If this is not the first PFd op, it should go into the other side
374 // of the processor to use the other FPd unit there. This should
375 // generally happen if two FPd ops are placed with 2 other
376 // instructions between them (modulo 6).
377 unsigned SUCycleIdx = getCurrCycleIdx(SU);
378 if (LastFPdOpCycleIdx > SUCycleIdx)
379 return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
380 return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
381}
382
384resourcesCost(SUnit *SU) {
385 int Cost = 0;
386
387 const MCSchedClassDesc *SC = getSchedClass(SU);
388 if (!SC->isValid())
389 return 0;
390
391 // For a FPd op, either return min or max value as indicated by the
392 // distance to any prior FPd op.
393 if (SU->isUnbuffered)
394 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
395 // For other instructions, give a cost to the use of the critical resource.
396 else if (CriticalResourceIdx != UINT_MAX) {
398 PI = SchedModel->getWriteProcResBegin(SC),
399 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
400 if (PI->ProcResourceIdx == CriticalResourceIdx)
401 Cost = PI->ReleaseAtCycle;
402 }
403
404 return Cost;
405}
406
408 bool TakenBranch) {
409 // Make a temporary SUnit.
410 SUnit SU(MI, 0);
411
412 // Set interesting flags.
413 SU.isCall = MI->isCall();
414
415 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
416 for (const MCWriteProcResEntry &PRE :
417 make_range(SchedModel->getWriteProcResBegin(SC),
418 SchedModel->getWriteProcResEnd(SC))) {
419 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
420 case 0:
421 SU.hasReservedResource = true;
422 break;
423 case 1:
424 SU.isUnbuffered = true;
425 break;
426 default:
427 break;
428 }
429 }
430
431 unsigned GroupSizeBeforeEmit = CurrGroupSize;
432 EmitInstruction(&SU);
433
434 if (!TakenBranch && isBranchRetTrap(MI)) {
435 // NT Branch on second slot ends group.
436 if (GroupSizeBeforeEmit == 1)
437 nextGroup();
438 }
439
440 if (TakenBranch && CurrGroupSize > 0)
441 nextGroup();
442
443 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
444 "Scheduler: unhandled terminator!");
445}
446
449 // Current decoder group
450 CurrGroupSize = Incoming->CurrGroupSize;
451 LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
452
453 // Processor resources
454 ProcResourceCounters = Incoming->ProcResourceCounters;
455 CriticalResourceIdx = Incoming->CriticalResourceIdx;
456
457 // FPd
458 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
459 GrpCount = Incoming->GrpCount;
460}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
MachineInstr unsigned OpIdx
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool isBranchRetTrap(MachineInstr *MI)
static cl::opt< int > ProcResCostLim("procres-cost-lim", cl::Hidden, cl::desc("The OOO window for processor " "resources during scheduling."), cl::init(8))
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
unsigned NodeNum
Entry # of node in the node vector.
bool isUnbuffered
Uses an unbuffered resource.
bool hasReservedResource
Uses a reserved resource.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
void assign(size_type NumElts, ValueParamT Elt)
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
void emitInstruction(MachineInstr *MI, bool TakenBranch=false)
Wrap a non-scheduled instruction in an SU and emit it.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void copyState(SystemZHazardRecognizer *Incoming)
Copy counters from end of single predecessor.
void Reset() override
Reset - This callback is invoked when a new block of instructions is about to be schedule.
void dumpSU(SUnit *SU, raw_ostream &OS) const
HazardType getHazardType(SUnit *SU, int Stalls=0) override
getHazardType - Return the hazard type of emitting this node.
void dumpCurrGroup(std::string Msg="") const
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
SystemZHazardRecognizer(const SystemZInstrInfo *tii, const TargetSchedModel *SM)
const MCWriteProcResEntry * ProcResIter
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
InstructionCost Cost
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
Define a kind of processor resource that will be modeled by the scheduler.
Definition MCSchedule.h:36
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition MCSchedule.h:68