This contains a MachineSchedStrategy implementation for maximizing wave occupancy on GCN hardware. More...

#include "GCNSchedStrategy.h"
#include "AMDGPUIGroupLP.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"

Classes
struct	EarlierIssuingCycle

Macros
#define	DEBUG_TYPE "machine-scheduler"
	Generally, the reason for having multiple scheduling stages is to account for the kernel-wide effect of register usage on occupancy.

Functions
static bool	canUsePressureDiffs (const SUnit &SU)
	Checks whether `SU` can use the cached DAG pressure diffs to compute the current register pressure.

static void	getRegisterPressures (bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure)

static void	printScheduleModel (std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)

static bool	hasIGLPInstrs (ScheduleDAGInstrs *DAG)

Variables
static cl::opt< bool >	DisableUnclusterHighRP ("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))

static cl::opt< bool >	DisableClusteredLowOccupancy ("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))

static cl::opt< unsigned >	ScheduleMetricBias ("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))

static cl::opt< bool >	RelaxedOcc ("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))

Detailed Description

This contains a MachineSchedStrategy implementation for maximizing wave occupancy on GCN hardware.

This pass will apply multiple scheduling stages to the same function. Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual entry point for the scheduling of those regions is GCNScheduleDAGMILive::runSchedStages.

Definition in file GCNSchedStrategy.cpp.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE "machine-scheduler"

Generally, the reason for having multiple scheduling stages is to account for the kernel-wide effect of register usage on occupancy.

Usually, only a few scheduling regions will have register pressure high enough to limit occupancy for the kernel, so constraints can be relaxed to improve ILP in other regions.

Definition at line 31 of file GCNSchedStrategy.cpp.

Function Documentation

◆ canUsePressureDiffs()

static bool canUsePressureDiffs ( const SUnit & SU )

static

Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.

This works for the common case, but it has a few exceptions that have been observed through trial and error:

Explicit physical register operands
Subregister definitions

In both of those cases, PressureDiff doesn't represent the actual pressure, and querying LiveIntervals through the RegPressureTracker is needed to get an accurate value.

We should eventually only use PressureDiff for maximum performance, but this already allows 80% of SUs to take the fast path without changing scheduling at all. Further changes would either change scheduling, or require a lot more logic to recover an accurate pressure estimate from the PressureDiffs.

Definition at line 135 of file GCNSchedStrategy.cpp.

References llvm::SUnit::getInstr(), llvm::SUnit::isInstr(), and llvm::MachineInstr::operands().

Referenced by llvm::GCNSchedStrategy::initCandidate().

◆ getRegisterPressures()

static void getRegisterPressures	(	bool	AtTop,
		const RegPressureTracker &	RPTracker,
		SUnit *	SU,
		std::vector< unsigned > &	Pressure,
		std::vector< unsigned > &	MaxPressure
	)

static

Definition at line 151 of file GCNSchedStrategy.cpp.

References llvm::RegPressureTracker::getDownwardPressure(), llvm::SUnit::getInstr(), and llvm::RegPressureTracker::getUpwardPressure().

Referenced by llvm::GCNSchedStrategy::initCandidate().

◆ hasIGLPInstrs()

static bool hasIGLPInstrs ( ScheduleDAGInstrs * DAG )

static

Definition at line 1649 of file GCNSchedStrategy.cpp.

References llvm::ScheduleDAGInstrs::begin(), llvm::ScheduleDAGInstrs::end(), and MI.

Referenced by llvm::GCNPostScheduleDAGMILive::schedule().

◆ printScheduleModel()

static void printScheduleModel ( std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > & ReadyCycles )

static

Definition at line 1116 of file GCNSchedStrategy.cpp.

References llvm::dbgs(), and I.

Referenced by llvm::GCNSchedStage::getScheduleMetrics().

Variable Documentation

◆ DisableClusteredLowOccupancy

cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))	(	"amdgpu-disable-clustered-low-occupancy-reschedule"	,
		cl::Hidden	,
		cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage.")	,
		cl::init(false)
	)

static

Referenced by llvm::ClusteredLowOccStage::initGCNSchedStage().

◆ DisableUnclusterHighRP

cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))	(	"amdgpu-disable-unclustered-high-rp-reschedule"	,
		cl::Hidden	,
		cl::desc("Disable unclustered high register pressure " "reduction scheduling stage.")	,
		cl::init(false)
	)

static

Referenced by llvm::UnclusteredHighRPStage::initGCNSchedStage().

◆ RelaxedOcc

cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))	(	"amdgpu-schedule-relaxed-occupancy"	,
		cl::Hidden	,
		cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold).")	,
		cl::init(false)
	)

static

Referenced by llvm::GCNScheduleDAGMILive::GCNScheduleDAGMILive(), and llvm::GCNSchedStrategy::initialize().

◆ ScheduleMetricBias

cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc( "Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))	(	"amdgpu-schedule-metric-bias"	,
		cl::Hidden	,
		cl::desc( "Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only.")	,
		cl::init(10)
	)

static

Referenced by llvm::UnclusteredHighRPStage::shouldRevertScheduling().

Classes

Macros

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ DEBUG_TYPE

Function Documentation

◆ canUsePressureDiffs()

◆ getRegisterPressures()

◆ hasIGLPInstrs()

◆ printScheduleModel()

Variable Documentation

◆ DisableClusteredLowOccupancy

◆ DisableUnclusterHighRP

◆ RelaxedOcc

◆ ScheduleMetricBias