LLVM 23.0.0git
SystemZMachineScheduler.cpp
Go to the documentation of this file.
1//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11
12using namespace llvm;
13
14#define DEBUG_TYPE "machine-scheduler"
15
16/// Pre-RA scheduling ///
17
18static bool isRegDef(const MachineOperand &MO) {
19 return MO.isReg() && MO.isDef();
20}
21
22bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
23 bool CCDef) const {
24 if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
25 (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) || !CCDef)) {
26 const MachineOperand &MO0 = MI->getOperand(0);
27 if (isRegDef(MO0) && MO0.getReg() == Cmp0SrcReg)
28 return true;
29 }
30 return false;
31}
32
33bool SystemZPreRASchedStrategy::closesLiveRange(const SUnit *SU,
34 ScheduleDAGMILive *DAG) const {
35 if (SU->getInstr()->isCopy())
36 return false;
37
38 // Extract the PressureChanges that all fp/vector or GR64/GR32/GRH32 regs
39 // affect respectively. misched-prera-pdiffs.mir tests against any future
40 // change in the PressureSets modelling, so simply hard-code them here.
41 int VR16PChange = 0, GRX32PChange = 0;
42 const PressureDiff &PDiff = DAG->getPressureDiff(SU);
43 for (const PressureChange &PC : PDiff) {
44 if (!PC.isValid())
45 break;
46 if (PC.getPSet() == SystemZ::VR16Bit)
47 VR16PChange = PC.getUnitInc();
48 else if (PC.getPSet() == SystemZ::GRX32Bit)
49 GRX32PChange = PC.getUnitInc();
50 }
51
52 // Return true for a (vreg) def when register pressure is reduced. Prioritize
53 // FP/vector regs over GPRs.
54 return VR16PChange < 0 || (!VR16PChange && GRX32PChange < 0);
55}
56
58 SchedCandidate &TryCand,
59 SchedBoundary *Zone) const {
60 assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");
61
62 // Initialize the candidate if needed.
63 if (!Cand.isValid()) {
64 TryCand.Reason = FirstValid;
65 return true;
66 }
67
68 // Bias physreg defs and copies to their uses and definitions respectively.
69 if (tryBiasPhysRegs(TryCand, Cand, Zone, /*BiasPRegsExtra=*/true))
70 return TryCand.Reason != NoCand;
71
72 if (RegionPolicy.ShouldTrackPressure) {
73 auto schedLow = [&](const SUnit *SU) {
74 return SU->getHeight() <= Zone->getScheduledLatency() &&
75 SU->getHeight() < LivenessHeightCutOff && closesLiveRange(SU, DAG);
76 };
77 // One SU closes a live range while preserving the scheduled latency.
78 if (tryGreater(schedLow(TryCand.SU), schedLow(Cand.SU), TryCand, Cand,
79 RegExcess))
80 return TryCand.Reason != NoCand;
81 }
82
83 if (!RegionPolicy.DisableLatencyHeuristic)
84 if (const SUnit *HigherSU =
85 TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU
86 : TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
87 : nullptr)
88 if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
89 HigherSU->getDepth() < computeRemLatency(*Zone)) {
90 // The higher SU increases the scheduled latency but is not on the
91 // Critical Path by Depth, so put it above the other one.
92 tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand,
94 return TryCand.Reason != NoCand;
95 }
96
97 // Weak edges help copy coalescing.
98 if (tryLess(TryCand.SU->WeakSuccsLeft, Cand.SU->WeakSuccsLeft, TryCand, Cand,
99 Weak))
100 return TryCand.Reason != NoCand;
101
102 // Help compare with zero elimination.
103 if (tryGreater(definesCmp0Src(TryCand.SU->getInstr()),
104 definesCmp0Src(Cand.SU->getInstr()), TryCand, Cand, Weak))
105 return TryCand.Reason != NoCand;
106
107 // Fall through to original instruction order.
108 if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
109 TryCand.Reason = NodeOrder;
110 return true;
111 }
112
113 return false;
114}
115
118 unsigned NumRegionInstrs) {
119 // TopRegionSUs is the number of SUs that is considered to be part of the
120 // "top" of a region. Liveness reduction is not done in regions smaller than
121 // this. The idea is to prioritize latency more after branches and help
122 // liveness only when the decoder is ahead of execution anyway.
123 static const unsigned TopRegionSUs = 36;
124
125 // Avoid setting up the register pressure tracker unless needed to save
126 // compile time.
127 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > TopRegionSUs;
128
129 // These heuristics has so far seemed to work better without adding a
130 // top-down boundary.
131 RegionPolicy.OnlyBottomUp = true;
132
134 this->NumRegionInstrs = NumRegionInstrs;
135}
136
139
140 Cmp0SrcReg = SystemZ::NoRegister;
141
142 unsigned DAGHeight = 0;
143 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
144 DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight());
145
146 if (RegionPolicy.ShouldTrackPressure)
147 LivenessHeightCutOff = DAGHeight / (DAG->SUnits.size() < 50 ? 4 : 2);
148
149 // Disable latency reduction if region has many SUs relative to the
150 // overall height.
151 RegionPolicy.DisableLatencyHeuristic =
152 DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u);
153}
154
156 GenericScheduler::schedNode(SU, IsTopNode);
157
158 const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(DAG->TII);
159 MachineInstr *MI = SU->getInstr();
160 if (TII->isCompareZero(*MI))
161 Cmp0SrcReg = TII->getCompareSourceReg(*MI);
162 else if (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) ||
163 definesCmp0Src(MI, /*CCDef=*/false))
164 Cmp0SrcReg = SystemZ::NoRegister;
165}
166
167/// Post-RA scheduling ///
168
169#ifndef NDEBUG
170// Print the set of SUs
171void SystemZPostRASchedStrategy::SUSet::
172dump(SystemZHazardRecognizer &HazardRec) const {
173 dbgs() << "{";
174 for (auto &SU : *this) {
175 HazardRec.dumpSU(SU, dbgs());
176 if (SU != *rbegin())
177 dbgs() << ", ";
178 }
179 dbgs() << "}\n";
180}
181#endif
182
183// Try to find a single predecessor that would be interesting for the
184// scheduler in the top-most region of MBB.
186 const MachineLoop *Loop) {
187 MachineBasicBlock *PredMBB = nullptr;
188 if (MBB->pred_size() == 1)
189 PredMBB = *MBB->pred_begin();
190
191 // The loop header has two predecessors, return the latch, but not for a
192 // single block loop.
193 if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
194 for (MachineBasicBlock *Pred : MBB->predecessors())
195 if (Loop->contains(Pred))
196 PredMBB = (Pred == MBB ? nullptr : Pred);
197 }
198
199 assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
200 && "Loop MBB should not consider predecessor outside of loop.");
201
202 return PredMBB;
203}
204
205void SystemZPostRASchedStrategy::
206advanceTo(MachineBasicBlock::iterator NextBegin) {
207 MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
209 ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
210 std::next(LastEmittedMI) : MBB->begin());
211
212 for (; I != NextBegin; ++I) {
213 if (I->isPosition() || I->isDebugInstr())
214 continue;
215 HazardRec->emitInstruction(&*I);
216 }
217}
218
220 Available.clear(); // -misched-cutoff.
221 LLVM_DEBUG(HazardRec->dumpState(););
222}
223
225 assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
226 "Entering MBB twice?");
227 LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));
228
229 MBB = NextMBB;
230
231 /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
232 /// point to it.
233 HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
234 LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
235 if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
236 dbgs() << ":\n";);
237
238 // Try to take over the state from a single predecessor, if it has been
239 // scheduled. If this is not possible, we are done.
240 MachineBasicBlock *SinglePredMBB =
241 getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
242 if (SinglePredMBB == nullptr)
243 return;
244 auto It = SchedStates.find(SinglePredMBB);
245 if (It == SchedStates.end())
246 return;
247
248 LLVM_DEBUG(dbgs() << "** Continued scheduling from "
249 << printMBBReference(*SinglePredMBB) << "\n";);
250
251 HazardRec->copyState(It->second);
252 LLVM_DEBUG(HazardRec->dumpState(););
253
254 // Emit incoming terminator(s). Be optimistic and assume that branch
255 // prediction will generally do "the right thing".
256 for (MachineInstr &MI : SinglePredMBB->terminators()) {
257 LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
258 bool TakenBranch = (MI.isBranch() &&
259 (TII->getBranchInfo(MI).isIndirect() ||
260 TII->getBranchInfo(MI).getMBBTarget() == MBB));
261 HazardRec->emitInstruction(&MI, TakenBranch);
262 if (TakenBranch)
263 break;
264 }
265}
266
268 LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);
269
270 // Advance to first terminator. The successor block will handle terminators
271 // dependent on CFG layout (T/NT branch etc).
272 advanceTo(MBB->getFirstTerminator());
273}
274
277 : MLI(C->MLI),
278 TII(static_cast<const SystemZInstrInfo *>
279 (C->MF->getSubtarget().getInstrInfo())),
280 MBB(nullptr), HazardRec(nullptr) {
281 const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
282 SchedModel.init(ST);
283}
284
286 // Delete hazard recognizers kept around for each MBB.
287 for (auto I : SchedStates) {
288 SystemZHazardRecognizer *hazrec = I.second;
289 delete hazrec;
290 }
291}
292
295 unsigned NumRegionInstrs) {
296 // Don't emit the terminators.
297 if (Begin->isTerminator())
298 return;
299
300 // Emit any instructions before start of region.
301 advanceTo(Begin);
302}
303
304// Pick the next node to schedule.
306 // Only scheduling top-down.
307 IsTopNode = true;
308
309 if (Available.empty())
310 return nullptr;
311
312 // If only one choice, return it.
313 if (Available.size() == 1) {
314 LLVM_DEBUG(dbgs() << "** Only one: ";
315 HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
316 return *Available.begin();
317 }
318
319 // All nodes that are possible to schedule are stored in the Available set.
320 LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
321
322 Candidate Best;
323 for (auto *SU : Available) {
324
325 // SU is the next candidate to be compared against current Best.
326 Candidate c(SU, *HazardRec);
327
328 // Remeber which SU is the best candidate.
329 if (Best.SU == nullptr || c < Best) {
330 Best = c;
331 LLVM_DEBUG(dbgs() << "** Best so far: ";);
332 } else
333 LLVM_DEBUG(dbgs() << "** Tried : ";);
334 LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
335 dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);
336
337 // Once we know we have seen all SUs that affect grouping or use unbuffered
338 // resources, we can stop iterating if Best looks good.
339 if (!SU->isScheduleHigh && Best.noCost())
340 break;
341 }
342
343 assert (Best.SU != nullptr);
344 return Best.SU;
345}
346
347SystemZPostRASchedStrategy::Candidate::
348Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
349 SU = SU_;
350
351 // Check the grouping cost. For a node that must begin / end a
352 // group, it is positive if it would do so prematurely, or negative
353 // if it would fit naturally into the schedule.
354 GroupingCost = HazardRec.groupingCost(SU);
355
356 // Check the resources cost for this SU.
357 ResourcesCost = HazardRec.resourcesCost(SU);
358}
359
360bool SystemZPostRASchedStrategy::Candidate::
361operator<(const Candidate &other) {
362
363 // Check decoder grouping.
364 if (GroupingCost < other.GroupingCost)
365 return true;
366 if (GroupingCost > other.GroupingCost)
367 return false;
368
369 // Compare the use of resources.
370 if (ResourcesCost < other.ResourcesCost)
371 return true;
372 if (ResourcesCost > other.ResourcesCost)
373 return false;
374
375 // Higher SU is otherwise generally better.
376 if (SU->getHeight() > other.SU->getHeight())
377 return true;
378 if (SU->getHeight() < other.SU->getHeight())
379 return false;
380
381 // If all same, fall back to original order.
382 if (SU->NodeNum < other.SU->NodeNum)
383 return true;
384
385 return false;
386}
387
389 LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
390 if (Available.size() == 1) dbgs() << "(only one) ";
391 Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);
392
393 // Remove SU from Available set and update HazardRec.
394 Available.erase(SU);
395 HazardRec->EmitInstruction(SU);
396}
397
399 // Set isScheduleHigh flag on all SUs that we want to consider first in
400 // pickNode().
401 const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
402 bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
403 SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
404
405 // Put all released SUs in the Available set.
406 Available.insert(SU);
407}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
if(PassOpts->AAPipeline)
#define LLVM_DEBUG(...)
Definition Debug.h:119
static MachineBasicBlock * getSingleSchedPred(MachineBasicBlock *MBB, const MachineLoop *Loop)
static bool isRegDef(const MachineOperand &MO)
Pre-RA scheduling ///.
MachineSchedPolicy RegionPolicy
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
bool isCopy() const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Scheduling unit. This is a node in the scheduling DAG.
unsigned NodeNum
Entry # of node in the node vector.
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
bool isScheduleHigh
True if preferable to schedule high.
unsigned WeakSuccsLeft
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
SystemZHazardRecognizer maintains the state for one MBB during scheduling.
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
void dumpSU(SUnit *SU, raw_ostream &OS) const
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void leaveMBB() override
Tell the strategy that current MBB is done.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Called for a region before scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
ScheduleDAGMI has scheduled an instruction - tell HazardRec about it.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void enterMBB(MachineBasicBlock *NextMBB) override
Tell the strategy that MBB is about to be processed.
SystemZPostRASchedStrategy(const MachineSchedContext *C)
void releaseTopNode(SUnit *SU) override
SU has had all predecessor dependencies resolved.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
TargetSubtargetInfo - Generic base class for all target subtargets.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI bool tryBiasPhysRegs(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone, bool BiasPRegsExtra)
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
LLVM_ABI unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:129
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...