LLVM 22.0.0git
GCNSchedStrategy.h
Go to the documentation of this file.
1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
21
22namespace llvm {
23
25class SIRegisterInfo;
26class GCNSubtarget;
27class GCNSchedStage;
28
37
38#ifndef NDEBUG
39raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
40#endif
41
42/// This is a minimal scheduler strategy. The main difference between this
43/// and the GenericScheduler is that GCNSchedStrategy uses different
44/// heuristics to determine excess/critical pressure sets.
46protected:
47 SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
48
49 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
50 const RegPressureTracker &RPTracker,
51 SchedCandidate &Cand, bool &IsPending,
52 bool IsBottomUp);
53
54 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
55 const RegPressureTracker &RPTracker,
56 const SIRegisterInfo *SRI, unsigned SGPRPressure,
57 unsigned VGPRPressure, bool IsBottomUp);
58
59 /// Evaluates instructions in the pending queue using a subset of scheduling
60 /// heuristics.
61 ///
62 /// Instructions that cannot be issued due to hardware constraints are placed
63 /// in the pending queue rather than the available queue, making them normally
64 /// invisible to scheduling heuristics. However, in certain scenarios (such as
65 /// avoiding register spilling), it may be beneficial to consider scheduling
66 /// these not-yet-ready instructions.
68 SchedBoundary *Zone) const;
69
70 void printCandidateDecision(const SchedCandidate &Current,
71 const SchedCandidate &Preferred);
72
73 std::vector<unsigned> Pressure;
74
75 std::vector<unsigned> MaxPressure;
76
78
80
82
84
85 // Scheduling stages for this strategy.
87
88 // Pointer to the current SchedStageID.
90
91 // GCN RP Tracker for top-down scheduling
93
94 // GCN RP Tracker for botttom-up scheduling
96
97public:
98 // schedule() have seen register pressure over the critical limits and had to
99 // track register pressure for actual scheduling heuristics.
101
102 // Schedule known to have excess register pressure. Be more conservative in
103 // increasing ILP and preserving VGPRs.
104 bool KnownExcessRP = false;
105
106 // An error margin is necessary because of poor performance of the generic RP
107 // tracker and can be adjusted up for tuning heuristics to try and more
108 // aggressively reduce register pressure.
109 unsigned ErrorMargin = 3;
110
111 // Bias for SGPR limits under a high register pressure.
112 const unsigned HighRPSGPRBias = 7;
113
114 // Bias for VGPR limits under a high register pressure.
115 const unsigned HighRPVGPRBias = 7;
116
118
120
121 unsigned SGPRLimitBias = 0;
122
123 unsigned VGPRLimitBias = 0;
124
126
127 SUnit *pickNode(bool &IsTopNode) override;
128
129 void schedNode(SUnit *SU, bool IsTopNode) override;
130
131 void initialize(ScheduleDAGMI *DAG) override;
132
133 unsigned getTargetOccupancy() { return TargetOccupancy; }
134
135 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
136
138
139 // Advances stage. Returns true if there are remaining stages.
140 bool advanceStage();
141
142 bool hasNextStage() const;
143
145
147
149};
150
151/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
152/// maximum number of waves per simd).
154public:
156 bool IsLegacyScheduler = false);
157};
158
159/// The goal of this scheduling strategy is to maximize ILP for a single wave
160/// (i.e. latency hiding).
162protected:
163 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
164 SchedBoundary *Zone) const override;
165
166public:
168};
169
170/// The goal of this scheduling strategy is to maximize memory clause for a
171/// single wave.
173protected:
174 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
175 SchedBoundary *Zone) const override;
176
177public:
179};
180
182 unsigned ScheduleLength;
183 unsigned BubbleCycles;
184
185public:
186 ScheduleMetrics() = default;
187 ScheduleMetrics(unsigned L, unsigned BC)
188 : ScheduleLength(L), BubbleCycles(BC) {}
189 unsigned getLength() const { return ScheduleLength; }
190 unsigned getBubbles() const { return BubbleCycles; }
191 unsigned getMetric() const {
192 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
193 // Metric is zero if the amount of bubbles is less than 1% which is too
194 // small. So, return 1.
195 return Metric ? Metric : 1;
196 }
197 static const unsigned ScaleFactor;
198};
199
201 dbgs() << "\n Schedule Metric (scaled by "
203 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
204 << Sm.getLength() << " ]\n";
205 return OS;
206}
207
208class GCNScheduleDAGMILive;
211 // The live in/out pressure as indexed by the first or last MI in the region
212 // before scheduling.
214 // The mapping of RegionIDx to key instruction
215 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
216 // Whether we are calculating LiveOuts or LiveIns
217 bool IsLiveOut;
218
219public:
220 RegionPressureMap() = default;
222 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
223 // Build the Instr->LiveReg and RegionIdx->Instr maps
224 void buildLiveRegMap();
225
226 // Retrieve the LiveReg for a given RegionIdx
228 assert(IdxToInstruction.contains(RegionIdx));
229 MachineInstr *Key = IdxToInstruction[RegionIdx];
230 return RegionLiveRegMap[Key];
231 }
232};
233
234/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
235/// boundary is inclusive, the upper boundary is exclusive.
237 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
238
240 friend class GCNSchedStage;
244 friend class PreRARematStage;
246 friend class RegionPressureMap;
247
248 const GCNSubtarget &ST;
249
251
252 // Occupancy target at the beginning of function scheduling cycle.
253 unsigned StartingOccupancy;
254
255 // Minimal real occupancy recorder for the function.
256 unsigned MinOccupancy;
257
258 // Vector of regions recorder for later rescheduling
260
261 // Record regions with high register pressure.
262 BitVector RegionsWithHighRP;
263
264 // Record regions with excess register pressure over the physical register
265 // limit. Register pressure in these regions usually will result in spilling.
266 BitVector RegionsWithExcessRP;
267
268 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
269 BitVector RegionsWithIGLPInstrs;
270
271 // Region live-in cache.
273
274 // Region pressure cache.
276
277 // Temporary basic block live-in cache.
279
280 // The map of the initial first region instruction to region live in registers
282
283 // Calculate the map of the initial first region instruction to region live in
284 // registers
286
287 // Calculate the map of the initial last region instruction to region live out
288 // registers
290 getRegionLiveOutMap() const;
291
292 // The live out registers per region. These are internally stored as a map of
293 // the initial last region instruction to region live out registers, but can
294 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
295 RegionPressureMap RegionLiveOuts;
296
297 // Return current region pressure.
298 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
299
300 // Compute and cache live-ins and pressure for all regions in block.
301 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
302
303 /// If necessary, updates a region's boundaries following insertion ( \p NewMI
304 /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
305 /// For an MI removal, this must be called before the MI is actually erased
306 /// from its parent MBB.
307 void updateRegionBoundaries(RegionBoundaries &RegionBounds,
309 MachineInstr *NewMI);
310
311 void runSchedStages();
312
313 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
314
315public:
317 std::unique_ptr<MachineSchedStrategy> S);
318
319 void schedule() override;
320
321 void finalizeSchedule() override;
322};
323
324// GCNSchedStrategy applies multiple scheduling stages to a function.
326protected:
328
330
332
334
336
338
339 // The current block being scheduled.
341
342 // Current region index.
343 unsigned RegionIdx = 0;
344
345 // Record the original order of instructions before scheduling.
346 std::vector<MachineInstr *> Unsched;
347
348 // RP before scheduling the current region.
350
351 // RP after scheduling the current region.
353
354 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
355
357
358public:
359 // Initialize state for a scheduling stage. Returns false if the current stage
360 // should be skipped.
361 virtual bool initGCNSchedStage();
362
363 // Finalize state after finishing a scheduling pass on the function.
364 virtual void finalizeGCNSchedStage();
365
366 // Setup for scheduling a region. Returns false if the current region should
367 // be skipped.
368 virtual bool initGCNRegion();
369
370 // Track whether a new region is also a new MBB.
371 void setupNewBlock();
372
373 // Finalize state after scheudling a region.
374 void finalizeGCNRegion();
375
376 // Check result of scheduling.
377 void checkScheduling();
378
379 // computes the given schedule virtual execution time in clocks
380 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
382 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
383 DenseMap<unsigned, unsigned> &ReadyCycles,
384 const TargetSchedModel &SM);
385
386 // Returns true if scheduling should be reverted.
387 virtual bool shouldRevertScheduling(unsigned WavesAfter);
388
389 // Returns true if current region has known excess pressure.
390 bool isRegionWithExcessRP() const {
391 return DAG.RegionsWithExcessRP[RegionIdx];
392 }
393
394 // The region number this stage is currently working on
395 unsigned getRegionIdx() { return RegionIdx; }
396
397 // Returns true if the new schedule may result in more spilling.
398 bool mayCauseSpilling(unsigned WavesAfter);
399
400 // Attempt to revert scheduling for this region.
401 void revertScheduling();
402
404
405 virtual ~GCNSchedStage() = default;
406};
407
415
417private:
418 // Save the initial occupancy before starting this stage.
419 unsigned InitialOccupancy;
420 // Save the temporary target occupancy before starting this stage.
421 unsigned TempTargetOccupancy;
422 // Track whether any region was scheduled by this stage.
423 bool IsAnyRegionScheduled;
424
425public:
426 bool initGCNSchedStage() override;
427
428 void finalizeGCNSchedStage() override;
429
430 bool initGCNRegion() override;
431
432 bool shouldRevertScheduling(unsigned WavesAfter) override;
433
436};
437
438// Retry function scheduling if we found resulting occupancy and it is
439// lower than used for other scheduling passes. This will give more freedom
440// to schedule low register pressure blocks.
442public:
443 bool initGCNSchedStage() override;
444
445 bool initGCNRegion() override;
446
447 bool shouldRevertScheduling(unsigned WavesAfter) override;
448
451};
452
453/// Attempts to reduce function spilling or, if there is no spilling, to
454/// increase function occupancy by one with respect to ArchVGPR usage by sinking
455/// rematerializable instructions to their use. When the stage
456/// estimates reducing spilling or increasing occupancy is possible, as few
457/// instructions as possible are rematerialized to reduce potential negative
458/// effects on function latency.
460private:
461 /// Useful information about a rematerializable instruction.
462 struct RematInstruction {
463 /// Single use of the rematerializable instruction's defined register,
464 /// located in a different block.
466 /// Rematerialized version of \p DefMI, set in
467 /// PreRARematStage::rematerialize. Used for reverting rematerializations.
468 MachineInstr *RematMI;
469 /// Set of regions in which the rematerializable instruction's defined
470 /// register is a live-in.
471 SmallDenseSet<unsigned, 4> LiveInRegions;
472
473 RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
474 };
475
476 /// Maps all MIs to their parent region. MI terminators are considered to be
477 /// outside the region they delimitate, and as such are not stored in the map.
479 /// Parent MBB to each region, in region order.
481 /// Collects instructions to rematerialize.
483 /// Collects regions whose live-ins or register pressure will change due to
484 /// rematerializations.
486 /// In case we need to rollback rematerializations, save lane masks for all
487 /// rematerialized registers in all regions in which they are live-ins.
489 /// After successful stage initialization, indicates which regions should be
490 /// rescheduled.
491 BitVector RescheduleRegions;
492 /// The target occupancy the stage is trying to achieve. Empty when the
493 /// objective is spilling reduction.
494 std::optional<unsigned> TargetOcc;
495 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
496 /// Smaller than or equal to the target occupancy.
497 unsigned AchievedOcc;
498
499 /// Returns whether remat can reduce spilling or increase function occupancy
500 /// by 1 through rematerialization. If it can do one, collects instructions in
501 /// PreRARematStage::Rematerializations and sets the target occupancy in
502 /// PreRARematStage::TargetOccupancy.
503 bool canIncreaseOccupancyOrReduceSpill();
504
505 /// Whether the MI is rematerializable
506 bool isReMaterializable(const MachineInstr &MI);
507
508 /// Rematerializes all instructions in PreRARematStage::Rematerializations
509 /// and stores the achieved occupancy after remat in
510 /// PreRARematStage::AchievedOcc.
511 void rematerialize();
512
513 /// If remat alone did not increase occupancy to the target one, rollbacks all
514 /// rematerializations and resets live-ins/RP in all regions impacted by the
515 /// stage to their pre-stage values.
516 void finalizeGCNSchedStage() override;
517
518public:
519 bool initGCNSchedStage() override;
520
521 bool initGCNRegion() override;
522
523 bool shouldRevertScheduling(unsigned WavesAfter) override;
524
527};
528
536
545
547private:
548 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
549
550 bool HasIGLPInstrs = false;
551
552public:
553 void schedule() override;
554
555 void finalizeSchedule() override;
556
558 std::unique_ptr<MachineSchedStrategy> S,
559 bool RemoveKillFlags);
560};
561
562} // End namespace llvm
563
564#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file defines the DenseMap class.
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
IRTranslator LLVM IR MI
This file implements a map that provides insertion order iteration.
bool shouldRevertScheduling(unsigned WavesAfter) override
ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
DenseMap< unsigned, LaneBitmask > LiveRegSet
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual ~GCNSchedStage()=default
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
std::vector< unsigned > MaxPressure
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
const unsigned HighRPVGPRBias
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
void setTargetOccupancy(unsigned Occ)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMILive * DAG
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
bool shouldRevertScheduling(unsigned WavesAfter) override
MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNSchedStage() override
Track the current register pressure at some position in the instruction stream, and remember the high...
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Scheduling unit. This is a node in the scheduling DAG.
Each Scheduling boundary is associated with ready queues.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
unsigned getBubbles() const
ScheduleMetrics(unsigned L, unsigned BC)
unsigned getLength() const
static const unsigned ScaleFactor
unsigned getMetric() const
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition DenseSet.h:291
typename SuperClass::iterator iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provide an instruction scheduling machine model to CodeGen passes.
UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...