LLVM 22.0.0git
GCNSchedStrategy.cpp File Reference

This contains a MachineSchedStrategy implementation for maximizing wave occupancy on GCN hardware. More...

Go to the source code of this file.

Classes

struct  EarlierIssuingCycle

Macros

#define DEBUG_TYPE   "machine-scheduler"
 Generally, the reason for having multiple scheduling stages is to account for the kernel-wide effect of register usage on occupancy.
#define REMAT_PREFIX   "[PreRARemat] "
 Allows to easily filter for this stage's debug output.
#define REMAT_DEBUG(X)

Functions

static bool canUsePressureDiffs (const SUnit &SU)
 Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static void getRegisterPressures (bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static MachineInstrgetLastMIForRegion (MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static void printScheduleModel (std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool hasIGLPInstrs (ScheduleDAGInstrs *DAG)

Variables

static cl::opt< boolDisableUnclusterHighRP ("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static cl::opt< boolDisableClusteredLowOccupancy ("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
static cl::opt< unsignedScheduleMetricBias ("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static cl::opt< boolRelaxedOcc ("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< boolGCNTrackers ("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))

Detailed Description

This contains a MachineSchedStrategy implementation for maximizing wave occupancy on GCN hardware.

This pass will apply multiple scheduling stages to the same function. Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual entry point for the scheduling of those regions is GCNScheduleDAGMILive::runSchedStages.

Definition in file GCNSchedStrategy.cpp.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "machine-scheduler"

Generally, the reason for having multiple scheduling stages is to account for the kernel-wide effect of register usage on occupancy.

Usually, only a few scheduling regions will have register pressure high enough to limit occupancy for the kernel, so constraints can be relaxed to improve ILP in other regions.

Definition at line 36 of file GCNSchedStrategy.cpp.

◆ REMAT_DEBUG

#define REMAT_DEBUG ( X)
Value:
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

Definition at line 1090 of file GCNSchedStrategy.cpp.

Referenced by llvm::PreRARematStage::initGCNSchedStage().

◆ REMAT_PREFIX

#define REMAT_PREFIX   "[PreRARemat] "

Allows to easily filter for this stage's debug output.

Definition at line 1089 of file GCNSchedStrategy.cpp.

Function Documentation

◆ canUsePressureDiffs()

bool canUsePressureDiffs ( const SUnit & SU)
static

Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.

This works for the common case, but it has a few exceptions that have been observed through trial and error:

  • Explicit physical register operands
  • Subregister definitions

In both of those cases, PressureDiff doesn't represent the actual pressure, and querying LiveIntervals through the RegPressureTracker is needed to get an accurate value.

We should eventually only use PressureDiff for maximum performance, but this already allows 80% of SUs to take the fast path without changing scheduling at all. Further changes would either change scheduling, or require a lot more logic to recover an accurate pressure estimate from the PressureDiffs.

Definition at line 149 of file GCNSchedStrategy.cpp.

References llvm::SUnit::getInstr(), llvm::SUnit::isInstr(), and llvm::MachineInstr::operands().

Referenced by llvm::GCNSchedStrategy::initCandidate().

◆ getLastMIForRegion()

MachineInstr * getLastMIForRegion ( MachineBasicBlock::iterator RegionBegin,
MachineBasicBlock::iterator RegionEnd )
static

◆ getRegisterPressures()

◆ hasIGLPInstrs()

◆ printScheduleModel()

void printScheduleModel ( std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > & ReadyCycles)
static

Variable Documentation

◆ DisableClusteredLowOccupancy

cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false)) ( "amdgpu-disable-clustered-low-occupancy-reschedule" ,
cl::Hidden ,
cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage.") ,
cl::init(false)  )
static

◆ DisableUnclusterHighRP

cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false)) ( "amdgpu-disable-unclustered-high-rp-reschedule" ,
cl::Hidden ,
cl::desc("Disable unclustered high register pressure " "reduction scheduling stage.") ,
cl::init(false)  )
static

◆ GCNTrackers

cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)) ( "amdgpu-use-amdgpu-trackers" ,
cl::Hidden ,
cl::desc("Use the AMDGPU specific RPTrackers during scheduling") ,
cl::init(false)  )
static

◆ RelaxedOcc

cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false)) ( "amdgpu-schedule-relaxed-occupancy" ,
cl::Hidden ,
cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold).") ,
cl::init(false)  )
static

◆ ScheduleMetricBias

cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc( "Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10)) ( "amdgpu-schedule-metric-bias" ,
cl::Hidden ,
cl::desc( "Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only.") ,
cl::init(10)  )
static