LLVM 23.0.0git
SystemZMachineScheduler.cpp
Go to the documentation of this file.
1//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11
12using namespace llvm;
13
14#define DEBUG_TYPE "machine-scheduler"
15
16/// Pre-RA scheduling ///
17
18static bool isRegDef(const MachineOperand &MO) {
19 return MO.isReg() && MO.isDef();
20}
21
22void SystemZPreRASchedStrategy::initializeLatencyReduction() {
23 // Enable latency reduction for a region that has a considerable amount of
24 // data sequences that should be interlaved. These are SUs that only have
25 // one data predecessor / successor edge(s) to their adjacent instruction(s)
26 // in the input order. Disable if region has many SUs relative to the
27 // overall height.
28 unsigned DAGHeight = 0;
29 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
30 DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight());
31 RegionPolicy.DisableLatencyHeuristic =
32 DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u);
33 if ((HasDataSequences = !RegionPolicy.DisableLatencyHeuristic)) {
34 unsigned CurrSequence = 0, NumSeqNodes = 0;
35 auto countSequence = [&CurrSequence, &NumSeqNodes]() {
36 if (CurrSequence >= 2)
37 NumSeqNodes += CurrSequence;
38 CurrSequence = 0;
39 };
40 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
41 const SUnit *SU = &DAG->SUnits[Idx];
42 bool InDataSequence = true;
43 // One Data pred to MI just above, or no preds.
44 unsigned NumPreds = 0;
45 for (const SDep &Pred : SU->Preds)
46 if (++NumPreds != 1 || Pred.getKind() != SDep::Data ||
47 Pred.getSUnit()->NodeNum != Idx - 1)
48 InDataSequence = false;
49 // One Data succ or no succs (ignoring ExitSU).
50 unsigned NumSuccs = 0;
51 for (const SDep &Succ : SU->Succs)
52 if (Succ.getSUnit() != &DAG->ExitSU &&
53 (++NumSuccs != 1 || Succ.getKind() != SDep::Data))
54 InDataSequence = false;
55 // Another type of node or one that does not have a single data pred
56 // ends any previous sequence.
57 if (!InDataSequence || !NumPreds)
58 countSequence();
59 if (InDataSequence)
60 CurrSequence++;
61 }
62 countSequence();
63 if (NumSeqNodes >= std::max(size_t(4), DAG->SUnits.size() / 4)) {
64 LLVM_DEBUG(dbgs() << "Number of nodes in def-use sequences: "
65 << NumSeqNodes << ". ";);
66 } else
67 HasDataSequences = false;
68 }
69}
70
71bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
72 bool CCDef) const {
73 if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
74 (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) || !CCDef)) {
75 const MachineOperand &MO0 = MI->getOperand(0);
76 if (isRegDef(MO0) && MO0.getReg() == Cmp0SrcReg)
77 return true;
78 }
79 return false;
80}
81
83 SchedCandidate &TryCand,
84 SchedBoundary *Zone) const {
85 assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");
86
87 // Initialize the candidate if needed.
88 if (!Cand.isValid()) {
89 TryCand.Reason = FirstValid;
90 return true;
91 }
92
93 // Bias physreg defs and copies to their uses and definitions respectively.
94 if (tryBiasPhysRegs(TryCand, Cand, Zone, /*BiasPRegsExtra=*/true))
95 return TryCand.Reason != NoCand;
96
97 // Don't extend the scheduled latency in regions with many nodes in data
98 // sequences, or for (single block loop) regions that are acyclically
99 // (within a single loop iteration) latency limited. IsAcyclicLatencyLimited
100 // is set only after initialization in registerRoots(), which is why it is
101 // checked here instead of earlier.
102 if (!RegionPolicy.DisableLatencyHeuristic &&
103 (HasDataSequences || Rem.IsAcyclicLatencyLimited))
104 if (const SUnit *HigherSU =
105 TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU
106 : TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
107 : nullptr)
108 if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
109 HigherSU->getDepth() < computeRemLatency(*Zone)) {
110 // One or both SUs increase the scheduled latency.
111 tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand,
113 return TryCand.Reason != NoCand;
114 }
115
116 // Weak edges help copy coalescing.
117 if (tryLess(TryCand.SU->WeakSuccsLeft, Cand.SU->WeakSuccsLeft, TryCand, Cand,
118 Weak))
119 return TryCand.Reason != NoCand;
120
121 // Help compare with zero elimination.
122 if (tryGreater(definesCmp0Src(TryCand.SU->getInstr()),
123 definesCmp0Src(Cand.SU->getInstr()), TryCand, Cand, Weak))
124 return TryCand.Reason != NoCand;
125
126 // Fall through to original instruction order.
127 if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
128 TryCand.Reason = NodeOrder;
129 return true;
130 }
131
132 return false;
133}
134
137 unsigned NumRegionInstrs) {
138 // Avoid setting up the register pressure tracker for small regions to save
139 // compile time. Currently only used for computeCyclicCriticalPath() which
140 // is used for single block loops.
141 MachineBasicBlock *MBB = Begin->getParent();
142 RegionPolicy.ShouldTrackPressure =
143 MBB->isSuccessor(MBB) && NumRegionInstrs >= 8;
144
145 // These heuristics has so far seemed to work better without adding a
146 // top-down boundary.
147 RegionPolicy.OnlyBottomUp = true;
149 this->NumRegionInstrs = NumRegionInstrs;
150}
151
154
155 Cmp0SrcReg = SystemZ::NoRegister;
156
157 initializeLatencyReduction();
158 LLVM_DEBUG(dbgs() << "Latency scheduling " << (HasDataSequences ? "" : "not ")
159 << "enabled for data sequences.\n";);
160}
161
163 GenericScheduler::schedNode(SU, IsTopNode);
164
165 const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(DAG->TII);
166 MachineInstr *MI = SU->getInstr();
167 if (TII->isCompareZero(*MI))
168 Cmp0SrcReg = TII->getCompareSourceReg(*MI);
169 else if (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) ||
170 definesCmp0Src(MI, /*CCDef=*/false))
171 Cmp0SrcReg = SystemZ::NoRegister;
172}
173
174/// Post-RA scheduling ///
175
176#ifndef NDEBUG
177// Print the set of SUs
178void SystemZPostRASchedStrategy::SUSet::
179dump(SystemZHazardRecognizer &HazardRec) const {
180 dbgs() << "{";
181 for (auto &SU : *this) {
182 HazardRec.dumpSU(SU, dbgs());
183 if (SU != *rbegin())
184 dbgs() << ", ";
185 }
186 dbgs() << "}\n";
187}
188#endif
189
190// Try to find a single predecessor that would be interesting for the
191// scheduler in the top-most region of MBB.
193 const MachineLoop *Loop) {
194 MachineBasicBlock *PredMBB = nullptr;
195 if (MBB->pred_size() == 1)
196 PredMBB = *MBB->pred_begin();
197
198 // The loop header has two predecessors, return the latch, but not for a
199 // single block loop.
200 if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
201 for (MachineBasicBlock *Pred : MBB->predecessors())
202 if (Loop->contains(Pred))
203 PredMBB = (Pred == MBB ? nullptr : Pred);
204 }
205
206 assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
207 && "Loop MBB should not consider predecessor outside of loop.");
208
209 return PredMBB;
210}
211
212void SystemZPostRASchedStrategy::
213advanceTo(MachineBasicBlock::iterator NextBegin) {
214 MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
216 ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
217 std::next(LastEmittedMI) : MBB->begin());
218
219 for (; I != NextBegin; ++I) {
220 if (I->isPosition() || I->isDebugInstr())
221 continue;
222 HazardRec->emitInstruction(&*I);
223 }
224}
225
227 Available.clear(); // -misched-cutoff.
228 LLVM_DEBUG(HazardRec->dumpState(););
229}
230
232 assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
233 "Entering MBB twice?");
234 LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));
235
236 MBB = NextMBB;
237
238 /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
239 /// point to it.
240 HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
241 LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
242 if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
243 dbgs() << ":\n";);
244
245 // Try to take over the state from a single predecessor, if it has been
246 // scheduled. If this is not possible, we are done.
247 MachineBasicBlock *SinglePredMBB =
248 getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
249 if (SinglePredMBB == nullptr)
250 return;
251 auto It = SchedStates.find(SinglePredMBB);
252 if (It == SchedStates.end())
253 return;
254
255 LLVM_DEBUG(dbgs() << "** Continued scheduling from "
256 << printMBBReference(*SinglePredMBB) << "\n";);
257
258 HazardRec->copyState(It->second);
259 LLVM_DEBUG(HazardRec->dumpState(););
260
261 // Emit incoming terminator(s). Be optimistic and assume that branch
262 // prediction will generally do "the right thing".
263 for (MachineInstr &MI : SinglePredMBB->terminators()) {
264 LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
265 bool TakenBranch = (MI.isBranch() &&
266 (TII->getBranchInfo(MI).isIndirect() ||
267 TII->getBranchInfo(MI).getMBBTarget() == MBB));
268 HazardRec->emitInstruction(&MI, TakenBranch);
269 if (TakenBranch)
270 break;
271 }
272}
273
275 LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);
276
277 // Advance to first terminator. The successor block will handle terminators
278 // dependent on CFG layout (T/NT branch etc).
279 advanceTo(MBB->getFirstTerminator());
280}
281
284 : MLI(C->MLI),
285 TII(static_cast<const SystemZInstrInfo *>
286 (C->MF->getSubtarget().getInstrInfo())),
287 MBB(nullptr), HazardRec(nullptr) {
288 const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
289 SchedModel.init(ST);
290}
291
293 // Delete hazard recognizers kept around for each MBB.
294 for (auto I : SchedStates) {
295 SystemZHazardRecognizer *hazrec = I.second;
296 delete hazrec;
297 }
298}
299
302 unsigned NumRegionInstrs) {
303 // Don't emit the terminators.
304 if (Begin->isTerminator())
305 return;
306
307 // Emit any instructions before start of region.
308 advanceTo(Begin);
309}
310
311// Pick the next node to schedule.
313 // Only scheduling top-down.
314 IsTopNode = true;
315
316 if (Available.empty())
317 return nullptr;
318
319 // If only one choice, return it.
320 if (Available.size() == 1) {
321 LLVM_DEBUG(dbgs() << "** Only one: ";
322 HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
323 return *Available.begin();
324 }
325
326 // All nodes that are possible to schedule are stored in the Available set.
327 LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
328
329 Candidate Best;
330 for (auto *SU : Available) {
331
332 // SU is the next candidate to be compared against current Best.
333 Candidate c(SU, *HazardRec);
334
335 // Remeber which SU is the best candidate.
336 if (Best.SU == nullptr || c < Best) {
337 Best = c;
338 LLVM_DEBUG(dbgs() << "** Best so far: ";);
339 } else
340 LLVM_DEBUG(dbgs() << "** Tried : ";);
341 LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
342 dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);
343
344 // Once we know we have seen all SUs that affect grouping or use unbuffered
345 // resources, we can stop iterating if Best looks good.
346 if (!SU->isScheduleHigh && Best.noCost())
347 break;
348 }
349
350 assert (Best.SU != nullptr);
351 return Best.SU;
352}
353
354SystemZPostRASchedStrategy::Candidate::
355Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
356 SU = SU_;
357
358 // Check the grouping cost. For a node that must begin / end a
359 // group, it is positive if it would do so prematurely, or negative
360 // if it would fit naturally into the schedule.
361 GroupingCost = HazardRec.groupingCost(SU);
362
363 // Check the resources cost for this SU.
364 ResourcesCost = HazardRec.resourcesCost(SU);
365}
366
367bool SystemZPostRASchedStrategy::Candidate::
368operator<(const Candidate &other) {
369
370 // Check decoder grouping.
371 if (GroupingCost < other.GroupingCost)
372 return true;
373 if (GroupingCost > other.GroupingCost)
374 return false;
375
376 // Compare the use of resources.
377 if (ResourcesCost < other.ResourcesCost)
378 return true;
379 if (ResourcesCost > other.ResourcesCost)
380 return false;
381
382 // Higher SU is otherwise generally better.
383 if (SU->getHeight() > other.SU->getHeight())
384 return true;
385 if (SU->getHeight() < other.SU->getHeight())
386 return false;
387
388 // If all same, fall back to original order.
389 if (SU->NodeNum < other.SU->NodeNum)
390 return true;
391
392 return false;
393}
394
396 LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
397 if (Available.size() == 1) dbgs() << "(only one) ";
398 Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);
399
400 // Remove SU from Available set and update HazardRec.
401 Available.erase(SU);
402 HazardRec->EmitInstruction(SU);
403}
404
406 // Set isScheduleHigh flag on all SUs that we want to consider first in
407 // pickNode().
408 const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
409 bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
410 SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
411
412 // Put all released SUs in the Available set.
413 Available.insert(SU);
414}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
if(PassOpts->AAPipeline)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static MachineBasicBlock * getSingleSchedPred(MachineBasicBlock *MBB, const MachineLoop *Loop)
static bool isRegDef(const MachineOperand &MO)
Pre-RA scheduling ///.
MachineSchedPolicy RegionPolicy
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
SUnit * getSUnit() const
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
Scheduling unit. This is a node in the scheduling DAG.
unsigned NodeNum
Entry # of node in the node vector.
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
bool isScheduleHigh
True if preferable to schedule high.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
unsigned WeakSuccsLeft
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
SystemZHazardRecognizer maintains the state for one MBB during scheduling.
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
void dumpSU(SUnit *SU, raw_ostream &OS) const
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void leaveMBB() override
Tell the strategy that current MBB is done.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Called for a region before scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
ScheduleDAGMI has scheduled an instruction - tell HazardRec about it.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void enterMBB(MachineBasicBlock *NextMBB) override
Tell the strategy that MBB is about to be processed.
SystemZPostRASchedStrategy(const MachineSchedContext *C)
void releaseTopNode(SUnit *SU) override
SU has had all predecessor dependencies resolved.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
TargetSubtargetInfo - Generic base class for all target subtargets.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool tryBiasPhysRegs(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone, bool BiasPRegsExtra)
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
LLVM_ABI unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...