LLVM 23.0.0git
SystemZMachineScheduler.cpp
Go to the documentation of this file.
1//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11
12using namespace llvm;
13
14#define DEBUG_TYPE "machine-scheduler"
15
16/// Pre-RA scheduling ///
17
18static bool isRegDef(const MachineOperand &MO) {
19 return MO.isReg() && MO.isDef();
20}
21
22static bool isPhysRegDef(const MachineOperand &MO) {
23 return isRegDef(MO) && MO.getReg().isPhysical();
24}
25
26void SystemZPreRASchedStrategy::initializeLatencyReduction() {
27 // Enable latency reduction for a region that has a considerable amount of
28 // data sequences that should be interlaved. These are SUs that only have
29 // one data predecessor / successor edge(s) to their adjacent instruction(s)
30 // in the input order. Disable if region has many SUs relative to the
31 // overall height.
32 unsigned DAGHeight = 0;
33 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
34 DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight());
35 RegionPolicy.DisableLatencyHeuristic =
36 DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u);
37 if ((HasDataSequences = !RegionPolicy.DisableLatencyHeuristic)) {
38 unsigned CurrSequence = 0, NumSeqNodes = 0;
39 auto countSequence = [&CurrSequence, &NumSeqNodes]() {
40 if (CurrSequence >= 2)
41 NumSeqNodes += CurrSequence;
42 CurrSequence = 0;
43 };
44 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
45 const SUnit *SU = &DAG->SUnits[Idx];
46 bool InDataSequence = true;
47 // One Data pred to MI just above, or no preds.
48 unsigned NumPreds = 0;
49 for (const SDep &Pred : SU->Preds)
50 if (++NumPreds != 1 || Pred.getKind() != SDep::Data ||
51 Pred.getSUnit()->NodeNum != Idx - 1)
52 InDataSequence = false;
53 // One Data succ or no succs (ignoring ExitSU).
54 unsigned NumSuccs = 0;
55 for (const SDep &Succ : SU->Succs)
56 if (Succ.getSUnit() != &DAG->ExitSU &&
57 (++NumSuccs != 1 || Succ.getKind() != SDep::Data))
58 InDataSequence = false;
59 // Another type of node or one that does not have a single data pred
60 // ends any previous sequence.
61 if (!InDataSequence || !NumPreds)
62 countSequence();
63 if (InDataSequence)
64 CurrSequence++;
65 }
66 countSequence();
67 if (NumSeqNodes >= std::max(size_t(4), DAG->SUnits.size() / 4)) {
68 LLVM_DEBUG(dbgs() << "Number of nodes in def-use sequences: "
69 << NumSeqNodes << ". ";);
70 } else
71 HasDataSequences = false;
72 }
73}
74
75bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
76 bool CCDef) const {
77 if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
78 (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) || !CCDef)) {
79 const MachineOperand &MO0 = MI->getOperand(0);
80 if (isRegDef(MO0) && MO0.getReg() == Cmp0SrcReg)
81 return true;
82 }
83 return false;
84}
85
86static int biasPhysRegExtra(const SUnit *SU) {
87 if (int Res = biasPhysReg(SU, /*isTop=*/false))
88 return Res;
89
90 // Also recognize Load Address. Most of these are with an FI operand.
91 const MachineInstr *MI = SU->getInstr();
92 return MI->getNumOperands() && !MI->isCopy() &&
93 isPhysRegDef(MI->getOperand(0));
94}
95
97 SchedCandidate &TryCand,
98 SchedBoundary *Zone) const {
99 assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");
100
101 // Initialize the candidate if needed.
102 if (!Cand.isValid()) {
103 TryCand.Reason = FirstValid;
104 return true;
105 }
106
107 // Bias physreg defs and copies to their uses and definitions respectively.
108 int TryCandPRegBias = biasPhysRegExtra(TryCand.SU);
109 int CandPRegBias = biasPhysRegExtra(Cand.SU);
110 if (tryGreater(TryCandPRegBias, CandPRegBias, TryCand, Cand, PhysReg))
111 return TryCand.Reason != NoCand;
112 if (TryCandPRegBias && CandPRegBias) {
113 // Both biased same way.
114 tryGreater(TryCand.SU->NodeNum, Cand.SU->NodeNum, TryCand, Cand, NodeOrder);
115 return TryCand.Reason != NoCand;
116 }
117
118 // Don't extend the scheduled latency in regions with many nodes in data
119 // sequences, or for (single block loop) regions that are acyclically
120 // (within a single loop iteration) latency limited. IsAcyclicLatencyLimited
121 // is set only after initialization in registerRoots(), which is why it is
122 // checked here instead of earlier.
123 if (!RegionPolicy.DisableLatencyHeuristic &&
124 (HasDataSequences || Rem.IsAcyclicLatencyLimited))
125 if (const SUnit *HigherSU =
126 TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU
127 : TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
128 : nullptr)
129 if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
130 HigherSU->getDepth() < computeRemLatency(*Zone)) {
131 // One or both SUs increase the scheduled latency.
132 tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand,
134 return TryCand.Reason != NoCand;
135 }
136
137 // Weak edges help copy coalescing.
138 if (tryLess(TryCand.SU->WeakSuccsLeft, Cand.SU->WeakSuccsLeft, TryCand, Cand,
139 Weak))
140 return TryCand.Reason != NoCand;
141
142 // Help compare with zero elimination.
143 if (tryGreater(definesCmp0Src(TryCand.SU->getInstr()),
144 definesCmp0Src(Cand.SU->getInstr()), TryCand, Cand, Weak))
145 return TryCand.Reason != NoCand;
146
147 // Fall through to original instruction order.
148 if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
149 TryCand.Reason = NodeOrder;
150 return true;
151 }
152
153 return false;
154}
155
158 unsigned NumRegionInstrs) {
159 // Avoid setting up the register pressure tracker for small regions to save
160 // compile time. Currently only used for computeCyclicCriticalPath() which
161 // is used for single block loops.
162 MachineBasicBlock *MBB = Begin->getParent();
163 RegionPolicy.ShouldTrackPressure =
164 MBB->isSuccessor(MBB) && NumRegionInstrs >= 8;
165
166 // These heuristics has so far seemed to work better without adding a
167 // top-down boundary.
168 RegionPolicy.OnlyBottomUp = true;
170 this->NumRegionInstrs = NumRegionInstrs;
171}
172
175
176 Cmp0SrcReg = SystemZ::NoRegister;
177
178 initializeLatencyReduction();
179 LLVM_DEBUG(dbgs() << "Latency scheduling " << (HasDataSequences ? "" : "not ")
180 << "enabled for data sequences.\n";);
181}
182
184 GenericScheduler::schedNode(SU, IsTopNode);
185
186 const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(DAG->TII);
187 MachineInstr *MI = SU->getInstr();
188 if (TII->isCompareZero(*MI))
189 Cmp0SrcReg = TII->getCompareSourceReg(*MI);
190 else if (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) ||
191 definesCmp0Src(MI, /*CCDef=*/false))
192 Cmp0SrcReg = SystemZ::NoRegister;
193}
194
195/// Post-RA scheduling ///
196
197#ifndef NDEBUG
198// Print the set of SUs
199void SystemZPostRASchedStrategy::SUSet::
200dump(SystemZHazardRecognizer &HazardRec) const {
201 dbgs() << "{";
202 for (auto &SU : *this) {
203 HazardRec.dumpSU(SU, dbgs());
204 if (SU != *rbegin())
205 dbgs() << ", ";
206 }
207 dbgs() << "}\n";
208}
209#endif
210
211// Try to find a single predecessor that would be interesting for the
212// scheduler in the top-most region of MBB.
214 const MachineLoop *Loop) {
215 MachineBasicBlock *PredMBB = nullptr;
216 if (MBB->pred_size() == 1)
217 PredMBB = *MBB->pred_begin();
218
219 // The loop header has two predecessors, return the latch, but not for a
220 // single block loop.
221 if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
222 for (MachineBasicBlock *Pred : MBB->predecessors())
223 if (Loop->contains(Pred))
224 PredMBB = (Pred == MBB ? nullptr : Pred);
225 }
226
227 assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
228 && "Loop MBB should not consider predecessor outside of loop.");
229
230 return PredMBB;
231}
232
233void SystemZPostRASchedStrategy::
234advanceTo(MachineBasicBlock::iterator NextBegin) {
235 MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
237 ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
238 std::next(LastEmittedMI) : MBB->begin());
239
240 for (; I != NextBegin; ++I) {
241 if (I->isPosition() || I->isDebugInstr())
242 continue;
243 HazardRec->emitInstruction(&*I);
244 }
245}
246
248 Available.clear(); // -misched-cutoff.
249 LLVM_DEBUG(HazardRec->dumpState(););
250}
251
253 assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
254 "Entering MBB twice?");
255 LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));
256
257 MBB = NextMBB;
258
259 /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
260 /// point to it.
261 HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
262 LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
263 if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
264 dbgs() << ":\n";);
265
266 // Try to take over the state from a single predecessor, if it has been
267 // scheduled. If this is not possible, we are done.
268 MachineBasicBlock *SinglePredMBB =
269 getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
270 if (SinglePredMBB == nullptr)
271 return;
272 auto It = SchedStates.find(SinglePredMBB);
273 if (It == SchedStates.end())
274 return;
275
276 LLVM_DEBUG(dbgs() << "** Continued scheduling from "
277 << printMBBReference(*SinglePredMBB) << "\n";);
278
279 HazardRec->copyState(It->second);
280 LLVM_DEBUG(HazardRec->dumpState(););
281
282 // Emit incoming terminator(s). Be optimistic and assume that branch
283 // prediction will generally do "the right thing".
284 for (MachineInstr &MI : SinglePredMBB->terminators()) {
285 LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
286 bool TakenBranch = (MI.isBranch() &&
287 (TII->getBranchInfo(MI).isIndirect() ||
288 TII->getBranchInfo(MI).getMBBTarget() == MBB));
289 HazardRec->emitInstruction(&MI, TakenBranch);
290 if (TakenBranch)
291 break;
292 }
293}
294
296 LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);
297
298 // Advance to first terminator. The successor block will handle terminators
299 // dependent on CFG layout (T/NT branch etc).
300 advanceTo(MBB->getFirstTerminator());
301}
302
305 : MLI(C->MLI),
306 TII(static_cast<const SystemZInstrInfo *>
307 (C->MF->getSubtarget().getInstrInfo())),
308 MBB(nullptr), HazardRec(nullptr) {
309 const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
310 SchedModel.init(ST);
311}
312
314 // Delete hazard recognizers kept around for each MBB.
315 for (auto I : SchedStates) {
316 SystemZHazardRecognizer *hazrec = I.second;
317 delete hazrec;
318 }
319}
320
323 unsigned NumRegionInstrs) {
324 // Don't emit the terminators.
325 if (Begin->isTerminator())
326 return;
327
328 // Emit any instructions before start of region.
329 advanceTo(Begin);
330}
331
332// Pick the next node to schedule.
334 // Only scheduling top-down.
335 IsTopNode = true;
336
337 if (Available.empty())
338 return nullptr;
339
340 // If only one choice, return it.
341 if (Available.size() == 1) {
342 LLVM_DEBUG(dbgs() << "** Only one: ";
343 HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
344 return *Available.begin();
345 }
346
347 // All nodes that are possible to schedule are stored in the Available set.
348 LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
349
350 Candidate Best;
351 for (auto *SU : Available) {
352
353 // SU is the next candidate to be compared against current Best.
354 Candidate c(SU, *HazardRec);
355
356 // Remeber which SU is the best candidate.
357 if (Best.SU == nullptr || c < Best) {
358 Best = c;
359 LLVM_DEBUG(dbgs() << "** Best so far: ";);
360 } else
361 LLVM_DEBUG(dbgs() << "** Tried : ";);
362 LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
363 dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);
364
365 // Once we know we have seen all SUs that affect grouping or use unbuffered
366 // resources, we can stop iterating if Best looks good.
367 if (!SU->isScheduleHigh && Best.noCost())
368 break;
369 }
370
371 assert (Best.SU != nullptr);
372 return Best.SU;
373}
374
375SystemZPostRASchedStrategy::Candidate::
376Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
377 SU = SU_;
378
379 // Check the grouping cost. For a node that must begin / end a
380 // group, it is positive if it would do so prematurely, or negative
381 // if it would fit naturally into the schedule.
382 GroupingCost = HazardRec.groupingCost(SU);
383
384 // Check the resources cost for this SU.
385 ResourcesCost = HazardRec.resourcesCost(SU);
386}
387
388bool SystemZPostRASchedStrategy::Candidate::
389operator<(const Candidate &other) {
390
391 // Check decoder grouping.
392 if (GroupingCost < other.GroupingCost)
393 return true;
394 if (GroupingCost > other.GroupingCost)
395 return false;
396
397 // Compare the use of resources.
398 if (ResourcesCost < other.ResourcesCost)
399 return true;
400 if (ResourcesCost > other.ResourcesCost)
401 return false;
402
403 // Higher SU is otherwise generally better.
404 if (SU->getHeight() > other.SU->getHeight())
405 return true;
406 if (SU->getHeight() < other.SU->getHeight())
407 return false;
408
409 // If all same, fall back to original order.
410 if (SU->NodeNum < other.SU->NodeNum)
411 return true;
412
413 return false;
414}
415
417 LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
418 if (Available.size() == 1) dbgs() << "(only one) ";
419 Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);
420
421 // Remove SU from Available set and update HazardRec.
422 Available.erase(SU);
423 HazardRec->EmitInstruction(SU);
424}
425
427 // Set isScheduleHigh flag on all SUs that we want to consider first in
428 // pickNode().
429 const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
430 bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
431 SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
432
433 // Put all released SUs in the Available set.
434 Available.insert(SU);
435}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
if(PassOpts->AAPipeline)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static int biasPhysRegExtra(const SUnit *SU)
static MachineBasicBlock * getSingleSchedPred(MachineBasicBlock *MBB, const MachineLoop *Loop)
static bool isRegDef(const MachineOperand &MO)
Pre-RA scheduling ///.
static bool isPhysRegDef(const MachineOperand &MO)
MachineSchedPolicy RegionPolicy
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
SUnit * getSUnit() const
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
Scheduling unit. This is a node in the scheduling DAG.
unsigned NodeNum
Entry # of node in the node vector.
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
bool isScheduleHigh
True if preferable to schedule high.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
unsigned WeakSuccsLeft
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
SystemZHazardRecognizer maintains the state for one MBB during scheduling.
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
void dumpSU(SUnit *SU, raw_ostream &OS) const
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void leaveMBB() override
Tell the strategy that current MBB is done.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Called for a region before scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
ScheduleDAGMI has scheduled an instruction - tell HazardRec about it.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void enterMBB(MachineBasicBlock *NextMBB) override
Tell the strategy that MBB is about to be processed.
SystemZPostRASchedStrategy(const MachineSchedContext *C)
void releaseTopNode(SUnit *SU) override
SU has had all predecessor dependencies resolved.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
TargetSubtargetInfo - Generic base class for all target subtargets.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
LLVM_ABI unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...