Go to the documentation of this file.
17 #define DEBUG_TYPE "machine-scheduler"
24 HasExcessPressure(
false), MF(nullptr) { }
37 const int ErrorMargin = 3;
43 if (TargetOccupancy) {
44 SGPRCriticalLimit =
ST.getMaxNumSGPRs(TargetOccupancy,
true);
45 VGPRCriticalLimit =
ST.getMaxNumVGPRs(TargetOccupancy);
48 AMDGPU::RegisterPressureSets::SReg_32);
50 AMDGPU::RegisterPressureSets::VGPR_32);
53 SGPRCriticalLimit -= ErrorMargin;
54 VGPRCriticalLimit -= ErrorMargin;
57 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand,
SUnit *SU,
60 unsigned SGPRPressure,
61 unsigned VGPRPressure) {
81 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
82 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
92 const unsigned MaxVGPRPressureInc = 16;
93 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
94 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
106 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
107 HasExcessPressure =
true;
108 Cand.RPDelta.Excess =
PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
109 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
112 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
113 HasExcessPressure =
true;
114 Cand.RPDelta.Excess =
PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
115 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
123 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
124 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
126 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
127 HasExcessPressure =
true;
128 if (SGPRDelta > VGPRDelta) {
129 Cand.RPDelta.CriticalMax =
131 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
133 Cand.RPDelta.CriticalMax =
135 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
142 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(
SchedBoundary &Zone,
143 const CandPolicy &ZonePolicy,
145 SchedCandidate &Cand) {
148 unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
149 unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
151 for (
SUnit *SU : Q) {
153 SchedCandidate TryCand(ZonePolicy);
154 initCandidate(TryCand, SU, Zone.
isTop(), RPTracker, SRI,
155 SGPRPressure, VGPRPressure);
157 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone :
nullptr;
159 if (TryCand.Reason !=
NoCand) {
161 if (TryCand.ResDelta == SchedResourceDelta())
163 Cand.setBest(TryCand);
171 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(
bool &IsTopNode) {
184 CandPolicy BotPolicy;
188 CandPolicy TopPolicy;
202 SchedCandidate TCand;
203 TCand.reset(CandPolicy());
206 "Last pick result should correspond to re-picking right now");
222 SchedCandidate TCand;
223 TCand.reset(CandPolicy());
226 "Last pick result should correspond to re-picking right now");
242 IsTopNode = Cand.AtTop;
277 SU = pickNodeBidirectional(IsTopNode);
289 HasClusteredNodes =
true;
301 std::unique_ptr<MachineSchedStrategy>
S) :
305 StartingOccupancy(MFI.getOccupancy()),
306 MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
308 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
312 if (Stage == Collect) {
318 std::vector<MachineInstr*> Unsched;
320 for (
auto &
I : *
this) {
321 Unsched.push_back(&
I);
326 PressureBefore = Pressure[RegionIdx];
328 LLVM_DEBUG(
dbgs() <<
"Pressure before scheduling:\nRegion live-ins:";
330 dbgs() <<
"Region live-in pressure: ";
332 dbgs() <<
"Region register pressure: ";
339 S.HasClusteredNodes = Stage > InitialSchedule;
340 S.HasExcessPressure =
false;
343 RescheduleRegions[RegionIdx] =
false;
344 if (Stage == InitialSchedule &&
S.HasClusteredNodes)
345 RegionsWithClusters[RegionIdx] =
true;
346 if (
S.HasExcessPressure)
347 RegionsWithHighRP[RegionIdx] =
true;
353 auto PressureAfter = getRealRegPressure();
356 PressureAfter.print(
dbgs()));
358 if (PressureAfter.getSGPRNum() <=
S.SGPRCriticalLimit &&
359 PressureAfter.getVGPRNum(ST.
hasGFX90AInsts()) <=
S.VGPRCriticalLimit) {
360 Pressure[RegionIdx] = PressureAfter;
365 unsigned WavesAfter =
std::min(Occ, PressureAfter.getOccupancy(ST));
367 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
368 <<
", after " << WavesAfter <<
".\n");
372 unsigned NewOccupancy =
std::max(WavesAfter, WavesBefore);
375 if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
377 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
379 NewOccupancy = WavesAfter;
381 if (NewOccupancy < MinOccupancy) {
382 MinOccupancy = NewOccupancy;
385 << MinOccupancy <<
".\n");
390 if (PressureAfter.getVGPRNum(
false) > MaxVGPRs ||
391 PressureAfter.getAGPRNum() > MaxVGPRs ||
392 PressureAfter.getSGPRNum() > MaxSGPRs) {
393 RescheduleRegions[RegionIdx] =
true;
394 RegionsWithHighRP[RegionIdx] =
true;
397 if (WavesAfter >= MinOccupancy) {
398 if (Stage == UnclusteredReschedule &&
399 !PressureAfter.less(ST, PressureBefore)) {
402 PressureAfter.less(ST, PressureBefore) ||
403 !RescheduleRegions[RegionIdx]) {
404 Pressure[RegionIdx] = PressureAfter;
405 if (!RegionsWithClusters[RegionIdx] &&
406 (Stage + 1) == UnclusteredReschedule)
407 RescheduleRegions[RegionIdx] =
false;
410 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
415 RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
416 (Stage + 1) != UnclusteredReschedule;
419 if (
MI->isDebugInstr())
425 if (!
MI->isDebugInstr())
429 for (
auto &
Op :
MI->operands())
430 if (
Op.isReg() &&
Op.isDef())
431 Op.setIsUndef(
false);
434 if (!
MI->isDebugInstr()) {
474 size_t CurRegion = RegionIdx;
475 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
476 if (Regions[CurRegion].first->getParent() !=
MBB)
481 auto LiveInIt = MBBLiveIns.find(
MBB);
482 if (LiveInIt != MBBLiveIns.end()) {
483 auto LiveIn =
std::move(LiveInIt->second);
485 MBBLiveIns.erase(LiveInIt);
487 auto &Rgn = Regions[CurRegion];
490 auto LRS = BBLiveInMap.lookup(NonDbgMI);
491 #ifdef EXPENSIVE_CHECKS
500 if (Regions[CurRegion].first ==
I) {
501 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
505 if (Regions[CurRegion].second ==
I) {
506 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
507 if (CurRegion-- == RegionIdx)
521 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
526 GCNScheduleDAGMILive::getBBLiveInMap()
const {
528 std::vector<MachineInstr *> BBStarters;
529 BBStarters.reserve(Regions.size());
530 auto I = Regions.rbegin(),
E = Regions.rend();
531 auto *
BB =
I->first->getParent();
534 BBStarters.push_back(
MI);
537 }
while (
I !=
E &&
I->first->getParent() ==
BB);
544 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
546 LiveIns.resize(Regions.size());
547 Pressure.resize(Regions.size());
548 RescheduleRegions.
resize(Regions.size());
549 RegionsWithClusters.
resize(Regions.size());
550 RegionsWithHighRP.
resize(Regions.size());
551 RescheduleRegions.
set();
552 RegionsWithClusters.
reset();
553 RegionsWithHighRP.
reset();
555 if (!Regions.empty())
556 BBLiveInMap = getBBLiveInMap();
558 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
565 if (Stage > InitialSchedule) {
574 if (Stage == UnclusteredReschedule) {
575 if (RescheduleRegions.
none())
578 "Retrying function scheduling without clustering.\n");
581 if (Stage == ClusteredLowOccupancyReschedule) {
582 if (StartingOccupancy <= MinOccupancy)
587 <<
"Retrying function scheduling with lowest recorded occupancy "
588 << MinOccupancy <<
".\n");
590 S.setTargetOccupancy(MinOccupancy);
594 if (Stage == UnclusteredReschedule)
597 for (
auto Region : Regions) {
598 if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) ||
599 (Stage == ClusteredLowOccupancyReschedule &&
600 !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
613 if (Stage == InitialSchedule)
614 computeBlockPressure(
MBB);
631 else dbgs() <<
"End";
641 if (Stage == UnclusteredReschedule)
643 }
while (Stage != LastStage);
MachineRegisterInfo & MRI
Virtual/real register map.
unsigned succ_size() const
const TargetRegisterInfo * TRI
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
SlotIndexes * getSlotIndexes() const
unsigned getMinAllowedOccupancy() const
RegisterClassInfo * RegClassInfo
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
List of registers defined and used by a machine instruction.
bool none() const
none - Returns true if none of the bits are set.
void reset(const CandPolicy &NewPolicy)
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
const TargetSchedModel * SchedModel
Track the current register pressure at some position in the instruction stream, and remember the high...
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SchedCandidate TopCand
Candidate last picked from Top boundary.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
bool isBottomReady() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
This is a minimal scheduler strategy.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
unsigned getOccupancy(const GCNSubtarget &ST) const
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
unsigned NodeNum
Entry # of node in the node vector.
Policy for scheduling the next instruction in the candidate's zone.
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
SlotIndex - An opaque wrapper around machine indexes.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
RegPressureTracker RPTracker
MachineSchedPolicy RegionPolicy
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Representation of each machine instruction.
const MachineSchedContext * Context
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=false)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
void advance()
Advance across the current instruction.
bool isScheduled
True once scheduled.
unsigned getMinWavesPerEU() const
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
succ_iterator succ_begin()
std::unique_ptr< MachineSchedStrategy > SchedImpl
bool hasGFX90AInsts() const
SchedCandidate BotCand
Candidate last picked from Bot boundary.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
unsigned getOccupancy() const
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
const TargetRegisterInfo * TRI
Target processor register info.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
const RegPressureTracker & getBotRPTracker() const
MachineFunction & MF
Machine function.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void limitOccupancy(const MachineFunction &MF)
bool ShouldTrackLaneMasks
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
void traceCandidate(const SchedCandidate &Cand)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C)
cl::opt< bool > VerifyScheduling
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineBasicBlock::iterator bottom() const
void finishBlock() override
Cleans up after scheduling in the given block.
Capture a change in pressure for a single pressure set.
Each Scheduling boundary is associated with ready queues.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
MachineBasicBlock * BB
The block in which to insert instructions.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
static void printLiveRegs(raw_ostream &OS, const LiveRegSet &LiveRegs, const MachineRegisterInfo &MRI)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const RegPressureTracker & getTopRPTracker() const
Align max(MaybeAlign Lhs, Align Rhs)
MachineBasicBlock::iterator top() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Scheduling unit. This is a node in the scheduling DAG.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void print(raw_ostream &OS, const GCNSubtarget *ST=nullptr) const
Helpers for implementing custom MachineSchedStrategy classes.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)