LLVM 18.0.0git
GCNSchedStrategy.cpp
Go to the documentation of this file.
1//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This contains a MachineSchedStrategy implementation for maximizing wave
11/// occupancy on GCN hardware.
12///
13/// This pass will apply multiple scheduling stages to the same function.
14/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15/// entry point for the scheduling of those regions is
16/// GCNScheduleDAGMILive::runSchedStages.
17
18/// Generally, the reason for having multiple scheduling stages is to account
19/// for the kernel-wide effect of register usage on occupancy. Usually, only a
20/// few scheduling regions will have register pressure high enough to limit
21/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22/// other regions.
23///
24//===----------------------------------------------------------------------===//
25
26#include "GCNSchedStrategy.h"
27#include "AMDGPUIGroupLP.h"
30
31#define DEBUG_TYPE "machine-scheduler"
32
33using namespace llvm;
34
35static cl::opt<bool>
36 DisableUnclusterHighRP("amdgpu-disable-unclustred-high-rp-reschedule",
38 cl::desc("Disable unclustred high register pressure "
39 "reduction scheduling stage."),
40 cl::init(false));
42 "amdgpu-schedule-metric-bias", cl::Hidden,
44 "Sets the bias which adds weight to occupancy vs latency. Set it to "
45 "100 to chase the occupancy only."),
46 cl::init(10));
47
48static cl::opt<bool>
49 RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
50 cl::desc("Relax occupancy targets for kernels which are memory "
51 "bound (amdgpu-membound-threshold), or "
52 "Wave Limited (amdgpu-limit-wave-threshold)."),
53 cl::init(false));
54
55const unsigned ScheduleMetrics::ScaleFactor = 100;
56
58 : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
59 HasHighPressure(false) {}
60
63
64 MF = &DAG->MF;
65
67
69 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
71 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
72
74 // Set the initial TargetOccupnacy to the maximum occupancy that we can
75 // achieve for this function. This effectively sets a lower bound on the
76 // 'Critical' register limits in the scheduler.
77 // Allow for lower occupancy targets if kernel is wave limited or memory
78 // bound, and using the relaxed occupancy feature.
82 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
83
84 if (!KnownExcessRP) {
86 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
87 } else {
88 // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
89 // returns a reasonably small number for targets with lots of VGPRs, such
90 // as GFX10 and GFX11.
91 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
92 "VGPRCriticalLimit calculation method.\n");
93
94 unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
95 unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
96 unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
97 VGPRBudget = std::max(VGPRBudget, Granule);
98 VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
99 }
100
101 // Subtract error margin and bias from register limits and avoid overflow.
106
107 LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
108 << ", VGPRExcessLimit = " << VGPRExcessLimit
109 << ", SGPRCriticalLimit = " << SGPRCriticalLimit
110 << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
111}
112
114 bool AtTop,
115 const RegPressureTracker &RPTracker,
116 const SIRegisterInfo *SRI,
117 unsigned SGPRPressure,
118 unsigned VGPRPressure) {
119 Cand.SU = SU;
120 Cand.AtTop = AtTop;
121
122 if (!DAG->isTrackingPressure())
123 return;
124
125 // getDownwardPressure() and getUpwardPressure() make temporary changes to
126 // the tracker, so we need to pass those function a non-const copy.
127 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
128
129 Pressure.clear();
130 MaxPressure.clear();
131
132 if (AtTop)
134 else {
135 // FIXME: I think for bottom up scheduling, the register pressure is cached
136 // and can be retrieved by DAG->getPressureDif(SU).
137 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
138 }
139
140 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
141 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
142
143 // If two instructions increase the pressure of different register sets
144 // by the same amount, the generic scheduler will prefer to schedule the
145 // instruction that increases the set with the least amount of registers,
146 // which in our case would be SGPRs. This is rarely what we want, so
147 // when we report excess/critical register pressure, we do it either
148 // only for VGPRs or only for SGPRs.
149
150 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
151 const unsigned MaxVGPRPressureInc = 16;
152 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
153 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
154
155
156 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
157 // to increase the likelihood we don't go over the limits. We should improve
158 // the analysis to look through dependencies to find the path with the least
159 // register pressure.
160
161 // We only need to update the RPDelta for instructions that increase register
162 // pressure. Instructions that decrease or keep reg pressure the same will be
163 // marked as RegExcess in tryCandidate() when they are compared with
164 // instructions that increase the register pressure.
165 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
166 HasHighPressure = true;
167 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
168 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
169 }
170
171 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
172 HasHighPressure = true;
173 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
174 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
175 }
176
177 // Register pressure is considered 'CRITICAL' if it is approaching a value
178 // that would reduce the wave occupancy for the execution unit. When
179 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
180 // has the same cost, so we don't need to prefer one over the other.
181
182 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
183 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
184
185 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
186 HasHighPressure = true;
187 if (SGPRDelta > VGPRDelta) {
188 Cand.RPDelta.CriticalMax =
189 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
190 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
191 } else {
192 Cand.RPDelta.CriticalMax =
193 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
194 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
195 }
196 }
197}
198
199// This function is mostly cut and pasted from
200// GenericScheduler::pickNodeFromQueue()
202 const CandPolicy &ZonePolicy,
203 const RegPressureTracker &RPTracker,
204 SchedCandidate &Cand) {
205 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
207 unsigned SGPRPressure = 0;
208 unsigned VGPRPressure = 0;
209 if (DAG->isTrackingPressure()) {
210 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
211 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
212 }
213 ReadyQueue &Q = Zone.Available;
214 for (SUnit *SU : Q) {
215
216 SchedCandidate TryCand(ZonePolicy);
217 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
218 SGPRPressure, VGPRPressure);
219 // Pass SchedBoundary only when comparing nodes from the same boundary.
220 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
221 tryCandidate(Cand, TryCand, ZoneArg);
222 if (TryCand.Reason != NoCand) {
223 // Initialize resource delta if needed in case future heuristics query it.
224 if (TryCand.ResDelta == SchedResourceDelta())
225 TryCand.initResourceDelta(Zone.DAG, SchedModel);
226 Cand.setBest(TryCand);
228 }
229 }
230}
231
232// This function is mostly cut and pasted from
233// GenericScheduler::pickNodeBidirectional()
235 // Schedule as far as possible in the direction of no choice. This is most
236 // efficient, but also provides the best heuristics for CriticalPSets.
237 if (SUnit *SU = Bot.pickOnlyChoice()) {
238 IsTopNode = false;
239 return SU;
240 }
241 if (SUnit *SU = Top.pickOnlyChoice()) {
242 IsTopNode = true;
243 return SU;
244 }
245 // Set the bottom-up policy based on the state of the current bottom zone and
246 // the instructions outside the zone, including the top zone.
247 CandPolicy BotPolicy;
248 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
249 // Set the top-down policy based on the state of the current top zone and
250 // the instructions outside the zone, including the bottom zone.
251 CandPolicy TopPolicy;
252 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
253
254 // See if BotCand is still valid (because we previously scheduled from Top).
255 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
256 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
257 BotCand.Policy != BotPolicy) {
260 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
261 } else {
263#ifndef NDEBUG
264 if (VerifyScheduling) {
265 SchedCandidate TCand;
266 TCand.reset(CandPolicy());
267 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
268 assert(TCand.SU == BotCand.SU &&
269 "Last pick result should correspond to re-picking right now");
270 }
271#endif
272 }
273
274 // Check if the top Q has a better candidate.
275 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
276 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
277 TopCand.Policy != TopPolicy) {
280 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
281 } else {
283#ifndef NDEBUG
284 if (VerifyScheduling) {
285 SchedCandidate TCand;
286 TCand.reset(CandPolicy());
287 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
288 assert(TCand.SU == TopCand.SU &&
289 "Last pick result should correspond to re-picking right now");
290 }
291#endif
292 }
293
294 // Pick best from BotCand and TopCand.
295 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
296 dbgs() << "Bot Cand: "; traceCandidate(BotCand););
297 SchedCandidate Cand = BotCand;
299 tryCandidate(Cand, TopCand, nullptr);
300 if (TopCand.Reason != NoCand) {
301 Cand.setBest(TopCand);
302 }
303 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
304
305 IsTopNode = Cand.AtTop;
306 return Cand.SU;
307}
308
309// This function is mostly cut and pasted from
310// GenericScheduler::pickNode()
312 if (DAG->top() == DAG->bottom()) {
314 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
315 return nullptr;
316 }
317 SUnit *SU;
318 do {
320 SU = Top.pickOnlyChoice();
321 if (!SU) {
322 CandPolicy NoPolicy;
323 TopCand.reset(NoPolicy);
325 assert(TopCand.Reason != NoCand && "failed to find a candidate");
326 SU = TopCand.SU;
327 }
328 IsTopNode = true;
329 } else if (RegionPolicy.OnlyBottomUp) {
330 SU = Bot.pickOnlyChoice();
331 if (!SU) {
332 CandPolicy NoPolicy;
333 BotCand.reset(NoPolicy);
335 assert(BotCand.Reason != NoCand && "failed to find a candidate");
336 SU = BotCand.SU;
337 }
338 IsTopNode = false;
339 } else {
340 SU = pickNodeBidirectional(IsTopNode);
341 }
342 } while (SU->isScheduled);
343
344 if (SU->isTopReady())
345 Top.removeReady(SU);
346 if (SU->isBottomReady())
347 Bot.removeReady(SU);
348
349 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
350 << *SU->getInstr());
351 return SU;
352}
353
356 return *CurrentStage;
357}
358
361 if (!CurrentStage)
363 else
364 CurrentStage++;
365
366 return CurrentStage != SchedStages.end();
367}
368
371 return std::next(CurrentStage) != SchedStages.end();
372}
373
375 assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
376 return *std::next(CurrentStage);
377}
378
380 const MachineSchedContext *C)
386}
387
391}
392
394 SchedCandidate &TryCand,
395 SchedBoundary *Zone) const {
396 // Initialize the candidate if needed.
397 if (!Cand.isValid()) {
398 TryCand.Reason = NodeOrder;
399 return true;
400 }
401
402 // Avoid spilling by exceeding the register limit.
403 if (DAG->isTrackingPressure() &&
404 tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
405 RegExcess, TRI, DAG->MF))
406 return TryCand.Reason != NoCand;
407
408 // Bias PhysReg Defs and copies to their uses and defined respectively.
409 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
410 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
411 return TryCand.Reason != NoCand;
412
413 bool SameBoundary = Zone != nullptr;
414 if (SameBoundary) {
415 // Prioritize instructions that read unbuffered resources by stall cycles.
416 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
417 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
418 return TryCand.Reason != NoCand;
419
420 // Avoid critical resource consumption and balance the schedule.
423 TryCand, Cand, ResourceReduce))
424 return TryCand.Reason != NoCand;
426 Cand.ResDelta.DemandedResources, TryCand, Cand,
428 return TryCand.Reason != NoCand;
429
430 // Unconditionally try to reduce latency.
431 if (tryLatency(TryCand, Cand, *Zone))
432 return TryCand.Reason != NoCand;
433
434 // Weak edges are for clustering and other constraints.
435 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
436 getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
437 return TryCand.Reason != NoCand;
438 }
439
440 // Keep clustered nodes together to encourage downstream peephole
441 // optimizations which may reduce resource requirements.
442 //
443 // This is a best effort to set things up for a post-RA pass. Optimizations
444 // like generating loads of multiple registers should ideally be done within
445 // the scheduler pass by combining the loads during DAG postprocessing.
446 const SUnit *CandNextClusterSU =
448 const SUnit *TryCandNextClusterSU =
450 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
451 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
452 return TryCand.Reason != NoCand;
453
454 // Avoid increasing the max critical pressure in the scheduled region.
455 if (DAG->isTrackingPressure() &&
457 TryCand, Cand, RegCritical, TRI, DAG->MF))
458 return TryCand.Reason != NoCand;
459
460 // Avoid increasing the max pressure of the entire region.
461 if (DAG->isTrackingPressure() &&
462 tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
463 Cand, RegMax, TRI, DAG->MF))
464 return TryCand.Reason != NoCand;
465
466 if (SameBoundary) {
467 // Fall through to original instruction order.
468 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
469 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
470 TryCand.Reason = NodeOrder;
471 return true;
472 }
473 }
474 return false;
475}
476
478 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
479 : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
480 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
481 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
482
483 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
484 if (RelaxedOcc) {
485 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
486 if (MinOccupancy != StartingOccupancy)
487 LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
488 << ".\n");
489 }
490}
491
492std::unique_ptr<GCNSchedStage>
493GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
494 switch (SchedStageID) {
496 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
498 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
500 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
502 return std::make_unique<PreRARematStage>(SchedStageID, *this);
504 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
505 }
506
507 llvm_unreachable("Unknown SchedStageID.");
508}
509
511 // Collect all scheduling regions. The actual scheduling is performed in
512 // GCNScheduleDAGMILive::finalizeSchedule.
513 Regions.push_back(std::pair(RegionBegin, RegionEnd));
514}
515
517GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
519 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
520 return RPTracker.moveMaxPressure();
521}
522
523void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
524 const MachineBasicBlock *MBB) {
526
527 // If the block has the only successor then live-ins of that successor are
528 // live-outs of the current block. We can reuse calculated live set if the
529 // successor will be sent to scheduling past current block.
530
531 // However, due to the bug in LiveInterval analysis it may happen that two
532 // predecessors of the same successor block have different lane bitmasks for
533 // a live-out register. Workaround that by sticking to one-to-one relationship
534 // i.e. one predecessor with one successor block.
535 const MachineBasicBlock *OnlySucc = nullptr;
536 if (MBB->succ_size() == 1) {
537 auto *Candidate = *MBB->succ_begin();
538 if (!Candidate->empty() && Candidate->pred_size() == 1) {
540 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))
541 OnlySucc = Candidate;
542 }
543 }
544
545 // Scheduler sends regions from the end of the block upwards.
546 size_t CurRegion = RegionIdx;
547 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
548 if (Regions[CurRegion].first->getParent() != MBB)
549 break;
550 --CurRegion;
551
552 auto I = MBB->begin();
553 auto LiveInIt = MBBLiveIns.find(MBB);
554 auto &Rgn = Regions[CurRegion];
555 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
556 if (LiveInIt != MBBLiveIns.end()) {
557 auto LiveIn = std::move(LiveInIt->second);
558 RPTracker.reset(*MBB->begin(), &LiveIn);
559 MBBLiveIns.erase(LiveInIt);
560 } else {
561 I = Rgn.first;
562 auto LRS = BBLiveInMap.lookup(NonDbgMI);
563#ifdef EXPENSIVE_CHECKS
564 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
565#endif
566 RPTracker.reset(*I, &LRS);
567 }
568
569 for (;;) {
570 I = RPTracker.getNext();
571
572 if (Regions[CurRegion].first == I || NonDbgMI == I) {
573 LiveIns[CurRegion] = RPTracker.getLiveRegs();
574 RPTracker.clearMaxPressure();
575 }
576
577 if (Regions[CurRegion].second == I) {
578 Pressure[CurRegion] = RPTracker.moveMaxPressure();
579 if (CurRegion-- == RegionIdx)
580 break;
581 }
582 RPTracker.advanceToNext();
583 RPTracker.advanceBeforeNext();
584 }
585
586 if (OnlySucc) {
587 if (I != MBB->end()) {
588 RPTracker.advanceToNext();
590 }
591 RPTracker.advanceBeforeNext();
592 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
593 }
594}
595
597GCNScheduleDAGMILive::getBBLiveInMap() const {
598 assert(!Regions.empty());
599 std::vector<MachineInstr *> BBStarters;
600 BBStarters.reserve(Regions.size());
601 auto I = Regions.rbegin(), E = Regions.rend();
602 auto *BB = I->first->getParent();
603 do {
604 auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
605 BBStarters.push_back(MI);
606 do {
607 ++I;
608 } while (I != E && I->first->getParent() == BB);
609 } while (I != E);
610 return getLiveRegMap(BBStarters, false /*After*/, *LIS);
611}
612
614 // Start actual scheduling here. This function is called by the base
615 // MachineScheduler after all regions have been recorded by
616 // GCNScheduleDAGMILive::schedule().
617 LiveIns.resize(Regions.size());
618 Pressure.resize(Regions.size());
619 RescheduleRegions.resize(Regions.size());
620 RegionsWithHighRP.resize(Regions.size());
621 RegionsWithExcessRP.resize(Regions.size());
622 RegionsWithMinOcc.resize(Regions.size());
623 RegionsWithIGLPInstrs.resize(Regions.size());
624 RescheduleRegions.set();
625 RegionsWithHighRP.reset();
626 RegionsWithExcessRP.reset();
627 RegionsWithMinOcc.reset();
628 RegionsWithIGLPInstrs.reset();
629
630 runSchedStages();
631}
632
633void GCNScheduleDAGMILive::runSchedStages() {
634 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
635
636 if (!Regions.empty())
637 BBLiveInMap = getBBLiveInMap();
638
639 GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
640 while (S.advanceStage()) {
641 auto Stage = createSchedStage(S.getCurrentStage());
642 if (!Stage->initGCNSchedStage())
643 continue;
644
645 for (auto Region : Regions) {
646 RegionBegin = Region.first;
647 RegionEnd = Region.second;
648 // Setup for scheduling the region and check whether it should be skipped.
649 if (!Stage->initGCNRegion()) {
650 Stage->advanceRegion();
651 exitRegion();
652 continue;
653 }
654
656 Stage->finalizeGCNRegion();
657 }
658
659 Stage->finalizeGCNSchedStage();
660 }
661}
662
663#ifndef NDEBUG
665 switch (StageID) {
667 OS << "Max Occupancy Initial Schedule";
668 break;
670 OS << "Unclustered High Register Pressure Reschedule";
671 break;
673 OS << "Clustered Low Occupancy Reschedule";
674 break;
676 OS << "Pre-RA Rematerialize";
677 break;
679 OS << "Max ILP Initial Schedule";
680 break;
681 }
682
683 return OS;
684}
685#endif
686
688 : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
689 MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
690
692 if (!DAG.LIS)
693 return false;
694
695 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
696 return true;
697}
698
701 return false;
702
704 return false;
705
706 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
707 return false;
708
711
712 InitialOccupancy = DAG.MinOccupancy;
713 // Aggressivly try to reduce register pressure in the unclustered high RP
714 // stage. Temporarily increase occupancy target in the region.
717 if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
718 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
719
721 dbgs()
722 << "Retrying function scheduling without clustering. "
723 "Aggressivly try to reduce register pressure to achieve occupancy "
724 << DAG.MinOccupancy << ".\n");
725
726 return true;
727}
728
731 return false;
732
733 // Don't bother trying to improve ILP in lower RP regions if occupancy has not
734 // been dropped. All regions will have already been scheduled with the ideal
735 // occupancy targets.
736 if (DAG.StartingOccupancy <= DAG.MinOccupancy)
737 return false;
738
740 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
741 << DAG.MinOccupancy << ".\n");
742 return true;
743}
744
747 return false;
748
749 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
750 return false;
751
753 // Check maximum occupancy
755 DAG.MinOccupancy)
756 return false;
757
758 // FIXME: This pass will invalidate cached MBBLiveIns for regions
759 // inbetween the defs and region we sinked the def to. Cached pressure
760 // for regions where a def is sinked from will also be invalidated. Will
761 // need to be fixed if there is another pass after this pass.
763
764 collectRematerializableInstructions();
765 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
766 return false;
767
769 dbgs() << "Retrying function scheduling with improved occupancy of "
770 << DAG.MinOccupancy << " from rematerializing\n");
771 return true;
772}
773
776 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
777}
778
782 if (DAG.MinOccupancy > InitialOccupancy) {
783 for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
784 DAG.RegionsWithMinOcc[IDX] =
785 DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
786
788 << " stage successfully increased occupancy to "
789 << DAG.MinOccupancy << '\n');
790 }
791
793}
794
796 // Check whether this new region is also a new block.
797 if (DAG.RegionBegin->getParent() != CurrentMBB)
799
800 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
801 DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
802
803 // Skip empty scheduling regions (0 or 1 schedulable instructions).
804 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
805 return false;
806
807 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
809 << " " << CurrentMBB->getName()
810 << "\n From: " << *DAG.begin() << " To: ";
812 else dbgs() << "End";
813 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
814
815 // Save original instruction order before scheduling for possible revert.
816 Unsched.clear();
817 Unsched.reserve(DAG.NumRegionInstrs);
820 for (auto &I : DAG) {
821 Unsched.push_back(&I);
822 if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
823 I.getOpcode() == AMDGPU::IGLP_OPT)
824 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
825 }
826 } else {
827 for (auto &I : DAG)
828 Unsched.push_back(&I);
829 }
830
831 PressureBefore = DAG.Pressure[RegionIdx];
832
834 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
835 << print(DAG.LiveIns[RegionIdx], DAG.MRI)
836 << "Region live-in pressure: "
838 << "Region register pressure: " << print(PressureBefore));
839
840 S.HasHighPressure = false;
842
843 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
845 SavedMutations.clear();
848 }
849
850 return true;
851}
852
854 // Only reschedule regions with the minimum occupancy or regions that may have
855 // spilling (excess register pressure).
856 if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
857 DAG.MinOccupancy <= InitialOccupancy) &&
858 !DAG.RegionsWithExcessRP[RegionIdx])
859 return false;
860
862}
863
865 // We may need to reschedule this region if it wasn't rescheduled in the last
866 // stage, or if we found it was testing critical register pressure limits in
867 // the unclustered reschedule stage. The later is because we may not have been
868 // able to raise the min occupancy in the previous stage so the region may be
869 // overly constrained even if it was already rescheduled.
870 if (!DAG.RegionsWithHighRP[RegionIdx])
871 return false;
872
874}
875
877 if (!DAG.RescheduleRegions[RegionIdx])
878 return false;
879
881}
882
884 if (CurrentMBB)
886
887 CurrentMBB = DAG.RegionBegin->getParent();
889 // Get real RP for the region if it hasn't be calculated before. After the
890 // initial schedule stage real RP will be collected after scheduling.
893 DAG.computeBlockPressure(RegionIdx, CurrentMBB);
894}
895
897 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
898 DAG.RescheduleRegions[RegionIdx] = false;
899 if (S.HasHighPressure)
900 DAG.RegionsWithHighRP[RegionIdx] = true;
901
902 // Revert scheduling if we have dropped occupancy or there is some other
903 // reason that the original schedule is better.
905
906 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
909
910 DAG.exitRegion();
911 RegionIdx++;
912}
913
915 // Check the results of scheduling.
916 PressureAfter = DAG.getRealRegPressure(RegionIdx);
917 LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
918 LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
919
922 DAG.Pressure[RegionIdx] = PressureAfter;
923 DAG.RegionsWithMinOcc[RegionIdx] =
924 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
925
926 // Early out if we have achieve the occupancy target.
927 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
928 return;
929 }
930
931 unsigned TargetOccupancy =
933 unsigned WavesAfter =
934 std::min(TargetOccupancy, PressureAfter.getOccupancy(ST));
935 unsigned WavesBefore =
936 std::min(TargetOccupancy, PressureBefore.getOccupancy(ST));
937 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
938 << ", after " << WavesAfter << ".\n");
939
940 // We may not be able to keep the current target occupancy because of the just
941 // scheduled region. We might still be able to revert scheduling if the
942 // occupancy before was higher, or if the current schedule has register
943 // pressure higher than the excess limits which could lead to more spilling.
944 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
945
946 // Allow memory bound functions to drop to 4 waves if not limited by an
947 // attribute.
948 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
949 WavesAfter >= MFI.getMinAllowedOccupancy()) {
950 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
951 << MFI.getMinAllowedOccupancy() << " waves\n");
952 NewOccupancy = WavesAfter;
953 }
954
955 if (NewOccupancy < DAG.MinOccupancy) {
956 DAG.MinOccupancy = NewOccupancy;
957 MFI.limitOccupancy(DAG.MinOccupancy);
958 DAG.RegionsWithMinOcc.reset();
959 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
960 << DAG.MinOccupancy << ".\n");
961 }
962
963 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
964 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
965 if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
966 PressureAfter.getAGPRNum() > MaxVGPRs ||
967 PressureAfter.getSGPRNum() > MaxSGPRs) {
968 DAG.RescheduleRegions[RegionIdx] = true;
969 DAG.RegionsWithHighRP[RegionIdx] = true;
970 DAG.RegionsWithExcessRP[RegionIdx] = true;
971 }
972
973 // Revert if this region's schedule would cause a drop in occupancy or
974 // spilling.
975 if (shouldRevertScheduling(WavesAfter)) {
977 } else {
978 DAG.Pressure[RegionIdx] = PressureAfter;
979 DAG.RegionsWithMinOcc[RegionIdx] =
980 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
981 }
982}
983
984unsigned
985GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
986 DenseMap<unsigned, unsigned> &ReadyCycles,
987 const TargetSchedModel &SM) {
988 unsigned ReadyCycle = CurrCycle;
989 for (auto &D : SU.Preds) {
990 if (D.isAssignedRegDep()) {
991 MachineInstr *DefMI = D.getSUnit()->getInstr();
992 unsigned Latency = SM.computeInstrLatency(DefMI);
993 unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
994 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
995 }
996 }
997 ReadyCycles[SU.NodeNum] = ReadyCycle;
998 return ReadyCycle;
999}
1000
1001#ifndef NDEBUG
1003 bool operator()(std::pair<MachineInstr *, unsigned> A,
1004 std::pair<MachineInstr *, unsigned> B) const {
1005 return A.second < B.second;
1006 }
1007};
1008
1009static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
1010 EarlierIssuingCycle> &ReadyCycles) {
1011 if (ReadyCycles.empty())
1012 return;
1013 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1014 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1015 << " ##################\n# Cycle #\t\t\tInstruction "
1016 " "
1017 " \n";
1018 unsigned IPrev = 1;
1019 for (auto &I : ReadyCycles) {
1020 if (I.second > IPrev + 1)
1021 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1022 << " CYCLES DETECTED ******************************\n\n";
1023 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
1024 IPrev = I.second;
1025 }
1026}
1027#endif
1028
1030GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1031#ifndef NDEBUG
1032 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1033 ReadyCyclesSorted;
1034#endif
1036 unsigned SumBubbles = 0;
1037 DenseMap<unsigned, unsigned> ReadyCycles;
1038 unsigned CurrCycle = 0;
1039 for (auto &SU : InputSchedule) {
1040 unsigned ReadyCycle =
1041 computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1042 SumBubbles += ReadyCycle - CurrCycle;
1043#ifndef NDEBUG
1044 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1045#endif
1046 CurrCycle = ++ReadyCycle;
1047 }
1048#ifndef NDEBUG
1049 LLVM_DEBUG(
1050 printScheduleModel(ReadyCyclesSorted);
1051 dbgs() << "\n\t"
1052 << "Metric: "
1053 << (SumBubbles
1054 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1055 : 1)
1056 << "\n\n");
1057#endif
1058
1059 return ScheduleMetrics(CurrCycle, SumBubbles);
1060}
1061
1064#ifndef NDEBUG
1065 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1066 ReadyCyclesSorted;
1067#endif
1069 unsigned SumBubbles = 0;
1070 DenseMap<unsigned, unsigned> ReadyCycles;
1071 unsigned CurrCycle = 0;
1072 for (auto &MI : DAG) {
1073 SUnit *SU = DAG.getSUnit(&MI);
1074 if (!SU)
1075 continue;
1076 unsigned ReadyCycle =
1077 computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);
1078 SumBubbles += ReadyCycle - CurrCycle;
1079#ifndef NDEBUG
1080 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1081#endif
1082 CurrCycle = ++ReadyCycle;
1083 }
1084#ifndef NDEBUG
1085 LLVM_DEBUG(
1086 printScheduleModel(ReadyCyclesSorted);
1087 dbgs() << "\n\t"
1088 << "Metric: "
1089 << (SumBubbles
1090 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1091 : 1)
1092 << "\n\n");
1093#endif
1094
1095 return ScheduleMetrics(CurrCycle, SumBubbles);
1096}
1097
1098bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1099 if (WavesAfter < DAG.MinOccupancy)
1100 return true;
1101
1102 return false;
1103}
1104
1107 return false;
1108
1110 return true;
1111
1112 if (mayCauseSpilling(WavesAfter))
1113 return true;
1114
1115 return false;
1116}
1117
1119 // If RP is not reduced in the unclustred reschedule stage, revert to the
1120 // old schedule.
1121 if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1122 mayCauseSpilling(WavesAfter)) ||
1124 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1125 return true;
1126 }
1127
1128 // Do not attempt to relax schedule even more if we are already spilling.
1130 return false;
1131
1132 LLVM_DEBUG(
1133 dbgs()
1134 << "\n\t *** In shouldRevertScheduling ***\n"
1135 << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1136 ScheduleMetrics MBefore =
1138 LLVM_DEBUG(
1139 dbgs()
1140 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1142 unsigned OldMetric = MBefore.getMetric();
1143 unsigned NewMetric = MAfter.getMetric();
1144 unsigned WavesBefore =
1146 unsigned Profit =
1147 ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1149 NewMetric) /
1151 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1152 << MAfter << "Profit: " << Profit << "\n");
1153 return Profit < ScheduleMetrics::ScaleFactor;
1154}
1155
1158 return false;
1159
1161 return true;
1162
1163 if (mayCauseSpilling(WavesAfter))
1164 return true;
1165
1166 return false;
1167}
1168
1171 return true;
1172
1173 if (mayCauseSpilling(WavesAfter))
1174 return true;
1175
1176 return false;
1177}
1178
1180 if (mayCauseSpilling(WavesAfter))
1181 return true;
1182
1183 return false;
1184}
1185
1186bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1187 if (WavesAfter <= MFI.getMinWavesPerEU() &&
1190 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1191 return true;
1192 }
1193
1194 return false;
1195}
1196
1198 DAG.RegionsWithMinOcc[RegionIdx] =
1199 PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1200 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1201 DAG.RescheduleRegions[RegionIdx] =
1202 S.hasNextStage() &&
1205 int SkippedDebugInstr = 0;
1206 for (MachineInstr *MI : Unsched) {
1207 if (MI->isDebugInstr()) {
1208 ++SkippedDebugInstr;
1209 continue;
1210 }
1211
1212 if (MI->getIterator() != DAG.RegionEnd) {
1213 DAG.BB->remove(MI);
1215 if (!MI->isDebugInstr())
1216 DAG.LIS->handleMove(*MI, true);
1217 }
1218
1219 // Reset read-undef flags and update them later.
1220 for (auto &Op : MI->all_defs())
1221 Op.setIsUndef(false);
1222 RegisterOperands RegOpers;
1223 RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
1224 if (!MI->isDebugInstr()) {
1226 // Adjust liveness and add missing dead+read-undef flags.
1228 RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
1229 } else {
1230 // Adjust for missing dead-def flags.
1231 RegOpers.detectDeadDefs(*MI, *DAG.LIS);
1232 }
1233 }
1234 DAG.RegionEnd = MI->getIterator();
1235 ++DAG.RegionEnd;
1236 LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1237 }
1238
1239 // After reverting schedule, debug instrs will now be at the end of the block
1240 // and RegionEnd will point to the first debug instr. Increment RegionEnd
1241 // pass debug instrs to the actual end of the scheduling region.
1242 while (SkippedDebugInstr-- > 0)
1243 ++DAG.RegionEnd;
1244
1245 // If Unsched.front() instruction is a debug instruction, this will actually
1246 // shrink the region since we moved all debug instructions to the end of the
1247 // block. Find the first instruction that is not a debug instruction.
1248 DAG.RegionBegin = Unsched.front()->getIterator();
1249 if (DAG.RegionBegin->isDebugInstr()) {
1250 for (MachineInstr *MI : Unsched) {
1251 if (MI->isDebugInstr())
1252 continue;
1253 DAG.RegionBegin = MI->getIterator();
1254 break;
1255 }
1256 }
1257
1258 // Then move the debug instructions back into their correct place and set
1259 // RegionBegin and RegionEnd if needed.
1261
1262 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1263}
1264
1265void PreRARematStage::collectRematerializableInstructions() {
1266 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1267 for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1269 if (!DAG.LIS->hasInterval(Reg))
1270 continue;
1271
1272 // TODO: Handle AGPR and SGPR rematerialization
1273 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1274 !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
1275 continue;
1276
1278 MachineInstr *Def = Op->getParent();
1279 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1280 continue;
1281
1283 if (Def->getParent() == UseI->getParent())
1284 continue;
1285
1286 // We are only collecting defs that are defined in another block and are
1287 // live-through or used inside regions at MinOccupancy. This means that the
1288 // register must be in the live-in set for the region.
1289 bool AddedToRematList = false;
1290 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1291 auto It = DAG.LiveIns[I].find(Reg);
1292 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1293 if (DAG.RegionsWithMinOcc[I]) {
1294 RematerializableInsts[I][Def] = UseI;
1295 AddedToRematList = true;
1296 }
1297
1298 // Collect regions with rematerializable reg as live-in to avoid
1299 // searching later when updating RP.
1300 RematDefToLiveInRegions[Def].push_back(I);
1301 }
1302 }
1303 if (!AddedToRematList)
1304 RematDefToLiveInRegions.erase(Def);
1305 }
1306}
1307
1308bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1309 const TargetInstrInfo *TII) {
1310 // Temporary copies of cached variables we will be modifying and replacing if
1311 // sinking succeeds.
1313 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
1314 NewRegions;
1317 BitVector NewRescheduleRegions;
1318 LiveIntervals *LIS = DAG.LIS;
1319
1320 NewRegions.resize(DAG.Regions.size());
1321 NewRescheduleRegions.resize(DAG.Regions.size());
1322
1323 // Collect only regions that has a rematerializable def as a live-in.
1324 SmallSet<unsigned, 16> ImpactedRegions;
1325 for (const auto &It : RematDefToLiveInRegions)
1326 ImpactedRegions.insert(It.second.begin(), It.second.end());
1327
1328 // Make copies of register pressure and live-ins cache that will be updated
1329 // as we rematerialize.
1330 for (auto Idx : ImpactedRegions) {
1331 NewPressure[Idx] = DAG.Pressure[Idx];
1332 NewLiveIns[Idx] = DAG.LiveIns[Idx];
1333 }
1334 NewRegions = DAG.Regions;
1335 NewRescheduleRegions.reset();
1336
1338 bool Improved = false;
1339 for (auto I : ImpactedRegions) {
1340 if (!DAG.RegionsWithMinOcc[I])
1341 continue;
1342
1343 Improved = false;
1344 int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
1345 int SGPRUsage = NewPressure[I].getSGPRNum();
1346
1347 // TODO: Handle occupancy drop due to AGPR and SGPR.
1348 // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1349 if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
1350 break;
1351
1352 // The occupancy of this region could have been improved by a previous
1353 // iteration's sinking of defs.
1354 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
1355 NewRescheduleRegions[I] = true;
1356 Improved = true;
1357 continue;
1358 }
1359
1360 // First check if we have enough trivially rematerializable instructions to
1361 // improve occupancy. Optimistically assume all instructions we are able to
1362 // sink decreased RP.
1363 int TotalSinkableRegs = 0;
1364 for (const auto &It : RematerializableInsts[I]) {
1365 MachineInstr *Def = It.first;
1366 Register DefReg = Def->getOperand(0).getReg();
1367 TotalSinkableRegs +=
1368 SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1369 }
1370 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1371 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
1372 // If in the most optimistic scenario, we cannot improve occupancy, then do
1373 // not attempt to sink any instructions.
1374 if (OptimisticOccupancy <= DAG.MinOccupancy)
1375 break;
1376
1377 unsigned ImproveOccupancy = 0;
1379 for (auto &It : RematerializableInsts[I]) {
1380 MachineInstr *Def = It.first;
1381 MachineBasicBlock::iterator InsertPos =
1382 MachineBasicBlock::iterator(It.second);
1383 Register Reg = Def->getOperand(0).getReg();
1384 // Rematerialize MI to its use block. Since we are only rematerializing
1385 // instructions that do not have any virtual reg uses, we do not need to
1386 // call LiveRangeEdit::allUsesAvailableAt() and
1387 // LiveRangeEdit::canRematerializeAt().
1388 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1389 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1390 MachineInstr *NewMI = &*std::prev(InsertPos);
1391 LIS->InsertMachineInstrInMaps(*NewMI);
1392 LIS->removeInterval(Reg);
1394 InsertedMIToOldDef[NewMI] = Def;
1395
1396 // Update region boundaries in scheduling region we sinked from since we
1397 // may sink an instruction that was at the beginning or end of its region
1398 DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
1399 /*Removing =*/true);
1400
1401 // Update region boundaries in region we sinked to.
1402 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
1403
1404 LaneBitmask PrevMask = NewLiveIns[I][Reg];
1405 // FIXME: Also update cached pressure for where the def was sinked from.
1406 // Update RP for all regions that has this reg as a live-in and remove
1407 // the reg from all regions as a live-in.
1408 for (auto Idx : RematDefToLiveInRegions[Def]) {
1409 NewLiveIns[Idx].erase(Reg);
1410 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1411 // Def is live-through and not used in this block.
1412 NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
1413 } else {
1414 // Def is used and rematerialized into this block.
1415 GCNDownwardRPTracker RPT(*LIS);
1416 auto *NonDbgMI = &*skipDebugInstructionsForward(
1417 NewRegions[Idx].first, NewRegions[Idx].second);
1418 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
1419 RPT.advance(NewRegions[Idx].second);
1420 NewPressure[Idx] = RPT.moveMaxPressure();
1421 }
1422 }
1423
1424 SinkedDefs.push_back(Def);
1425 ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1426 if (ImproveOccupancy > DAG.MinOccupancy)
1427 break;
1428 }
1429
1430 // Remove defs we just sinked from all regions' list of sinkable defs
1431 for (auto &Def : SinkedDefs)
1432 for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1433 RematerializableInsts[TrackedIdx].erase(Def);
1434
1435 if (ImproveOccupancy <= DAG.MinOccupancy)
1436 break;
1437
1438 NewRescheduleRegions[I] = true;
1439 Improved = true;
1440 }
1441
1442 if (!Improved) {
1443 // Occupancy was not improved for all regions that were at MinOccupancy.
1444 // Undo sinking and remove newly rematerialized instructions.
1445 for (auto &Entry : InsertedMIToOldDef) {
1446 MachineInstr *MI = Entry.first;
1447 MachineInstr *OldMI = Entry.second;
1448 Register Reg = MI->getOperand(0).getReg();
1450 MI->eraseFromParent();
1451 OldMI->clearRegisterDeads(Reg);
1452 LIS->removeInterval(Reg);
1454 }
1455 return false;
1456 }
1457
1458 // Occupancy was improved for all regions.
1459 for (auto &Entry : InsertedMIToOldDef) {
1460 MachineInstr *MI = Entry.first;
1461 MachineInstr *OldMI = Entry.second;
1462
1463 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1464 DAG.BBLiveInMap.erase(OldMI);
1465
1466 // Remove OldMI and update LIS
1467 Register Reg = MI->getOperand(0).getReg();
1468 LIS->RemoveMachineInstrFromMaps(*OldMI);
1469 OldMI->eraseFromParent();
1470 LIS->removeInterval(Reg);
1472 }
1473
1474 // Update live-ins, register pressure, and regions caches.
1475 for (auto Idx : ImpactedRegions) {
1476 DAG.LiveIns[Idx] = NewLiveIns[Idx];
1477 DAG.Pressure[Idx] = NewPressure[Idx];
1478 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1479 }
1480 DAG.Regions = NewRegions;
1481 DAG.RescheduleRegions = NewRescheduleRegions;
1482
1484 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
1485
1486 return true;
1487}
1488
1489// Copied from MachineLICM
1490bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1492 return false;
1493
1494 for (const MachineOperand &MO : MI.all_uses())
1495 if (MO.getReg().isVirtual())
1496 return false;
1497
1498 return true;
1499}
1500
1501// When removing, we will have to check both beginning and ending of the region.
1502// When inserting, we will only have to check if we are inserting NewMI in front
1503// of a scheduling region and do not need to check the ending since we will only
1504// ever be inserting before an already existing MI.
1505void GCNScheduleDAGMILive::updateRegionBoundaries(
1507 MachineBasicBlock::iterator>> &RegionBoundaries,
1508 MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
1509 unsigned I = 0, E = RegionBoundaries.size();
1510 // Search for first region of the block where MI is located
1511 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1512 ++I;
1513
1514 for (; I != E; ++I) {
1515 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1516 return;
1517
1518 if (Removing && MI == RegionBoundaries[I].first &&
1519 MI == RegionBoundaries[I].second) {
1520 // MI is in a region with size 1, after removing, the region will be
1521 // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1522 RegionBoundaries[I] =
1523 std::pair(MI->getParent()->end(), MI->getParent()->end());
1524 return;
1525 }
1526 if (MI == RegionBoundaries[I].first) {
1527 if (Removing)
1528 RegionBoundaries[I] =
1529 std::pair(std::next(MI), RegionBoundaries[I].second);
1530 else
1531 // Inserted NewMI in front of region, set new RegionBegin to NewMI
1532 RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
1533 RegionBoundaries[I].second);
1534 return;
1535 }
1536 if (Removing && MI == RegionBoundaries[I].second) {
1537 RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
1538 return;
1539 }
1540 }
1541}
1542
1544 return std::any_of(
1545 DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1546 unsigned Opc = MI->getOpcode();
1547 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1548 });
1549}
1550
1552 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1553 bool RemoveKillFlags)
1554 : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1555
1557 HasIGLPInstrs = hasIGLPInstrs(this);
1558 if (HasIGLPInstrs) {
1559 SavedMutations.clear();
1560 SavedMutations.swap(Mutations);
1562 }
1563
1565}
1566
1568 if (HasIGLPInstrs)
1569 SavedMutations.swap(Mutations);
1570
1572}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustred-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustred high register pressure " "reduction scheduling stage."), cl::init(false))
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
if(VerifyEach)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BitVector & reset()
Definition: BitVector.h:392
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:188
bool shouldRevertScheduling(unsigned WavesAfter) override
This class represents an Operation in the Expression.
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
virtual bool initGCNRegion()
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand)
SUnit * pickNodeBidirectional(bool &IsTopNode)
std::vector< unsigned > MaxPressure
GCNSchedStageID getCurrentStage()
MachineFunction * MF
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure)
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
const unsigned HighRPVGPRBias
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
bool hasGFX90AInsts() const
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:232
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
MachineSchedPolicy RegionPolicy
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
unsigned succ_size() const
MachineInstrBundleIterator< MachineInstr > iterator
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:326
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
MachineOperand class - Representation of each machine instruction operand.
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
void setUnitInc(int Inc)
Helpers for implementing custom MachineSchedStrategy classes.
bool empty() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
const TargetSchedModel & getSchedModel() const
Definition: SIInstrInfo.h:1260
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
void limitOccupancy(const MachineFunction &MF)
static unsigned getNumCoveredRegs(LaneBitmask LM)
static bool isVGPRClass(const TargetRegisterClass *RC)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
bool isBottomReady() const
Definition: ScheduleDAG.h:449
bool isTopReady() const
Definition: ScheduleDAG.h:446
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Each Scheduling boundary is associated with ready queues.
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
ScheduleDAGMI * DAG
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
A ScheduleDAG for scheduling lists of MachineInstr.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
const RegPressureTracker & getBotRPTracker() const
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
const RegPressureTracker & getTopRPTracker() const
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
MachineBasicBlock::iterator bottom() const
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:241
SlotIndexes pass.
Definition: SlotIndexes.h:301
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:453
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
bool isTriviallyReMaterializable(const MachineInstr &MI) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetInstrInfo * getInstrInfo() const
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
unsigned getWeakLeft(const SUnit *SU, bool isTop)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
cl::opt< bool > VerifyScheduling
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1854
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:425
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getOccupancy(const GCNSubtarget &ST) const
unsigned getVGPRNum(bool UnifiedVGPRFile) const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
bool less(const GCNSubtarget &ST, const GCNRegPressure &O, unsigned MaxOccupancy=std::numeric_limits< unsigned >::max()) const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
PressureChange CriticalMax