#include "Target/AMDGPU/GCNSubtarget.h"

Inheritance diagram for llvm::GCNSubtarget:

Public Types
enum class	TrapHandlerAbi { NONE = 0x00 , AMDHSA = 0x01 }
enum class	TrapID { LLVMAMDHSATrap = 0x02 , LLVMAMDHSADebugTrap = 0x03 }
Public Types inherited from llvm::AMDGPUSubtarget
enum	Generation { INVALID = 0 , R600 = 1 , R700 = 2 , EVERGREEN = 3 , NORTHERN_ISLANDS = 4 , SOUTHERN_ISLANDS = 5 , SEA_ISLANDS = 6 , VOLCANIC_ISLANDS = 7 , GFX9 = 8 , GFX10 = 9 , GFX11 = 10 , GFX12 = 11 , GFX13 = 12 }

Public Member Functions
	GCNSubtarget (const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
	~GCNSubtarget () override
GCNSubtarget &	initializeSubtargetDependencies (const Triple &TT, StringRef GPU, StringRef FS)
void	checkSubtargetFeatures (const Function &F) const
	Diagnose inconsistent subtarget features before attempting to codegen function `F`.
const SIInstrInfo *	getInstrInfo () const override
const SIFrameLowering *	getFrameLowering () const override
const SITargetLowering *	getTargetLowering () const override
const SIRegisterInfo *	getRegisterInfo () const override
const SelectionDAGTargetInfo *	getSelectionDAGInfo () const override
const CallLowering *	getCallLowering () const override
const InlineAsmLowering *	getInlineAsmLowering () const override
InstructionSelector *	getInstructionSelector () const override
const LegalizerInfo *	getLegalizerInfo () const override
const AMDGPURegisterBankInfo *	getRegBankInfo () const override
const AMDGPU::IsaInfo::AMDGPUTargetID &	getTargetID () const
const InstrItineraryData *	getInstrItineraryData () const override
void	ParseSubtargetFeatures (StringRef CPU, StringRef TuneCPU, StringRef FS)
Generation	getGeneration () const
bool	isGFX11Plus () const
unsigned	getMaxWaveScratchSize () const
unsigned	getKnownHighZeroBitsForFrameIndex () const
	Return the number of high bits known to be zero for a frame index.
int	getLDSBankCount () const
unsigned	getInstCacheLineSize () const
	Instruction cache line size in bytes (64 for pre-GFX11, 128 for GFX11+).
unsigned	getMaxPrivateElementSize (bool ForBufferRSrc=false) const
unsigned	getConstantBusLimit (unsigned Opcode) const
bool	zeroesHigh16BitsOfDest (unsigned Opcode) const
	Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicitly zeroes the high 16-bits, rather than preserve the original value.
bool	supportsWGP () const
bool	hasHWFP64 () const
bool	hasAddr64 () const
bool	hasFlat () const
bool	hasOnlyRevVALUShifts () const
bool	hasFractBug () const
bool	hasMed3_16 () const
bool	hasMin3Max3_16 () const
bool	hasSwap () const
bool	hasScalarPackInsts () const
bool	hasScalarMulHiInsts () const
bool	hasScalarSubwordLoads () const
TrapHandlerAbi	getTrapHandlerAbi () const
bool	supportsGetDoorbellID () const
bool	hasUsableDSOffset () const
	True if the offset field of DS instructions works as expected.
bool	unsafeDSOffsetFoldingEnabled () const
bool	hasUsableDivScaleConditionOutput () const
	Condition output from div_scale is usable.
bool	hasReadVCCZBug () const
	Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool	partialVCCWritesUpdateVCCZ () const
	Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool	hasSMRDReadVALUDefHazard () const
	A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU instruction.
bool	hasVMEMReadSGPRVALUDefHazard () const
	A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU Instruction.
bool	hasRFEHazards () const
unsigned	getSetRegWaitStates () const
	Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
unsigned	getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
	Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool	supportsMinMaxDenormModes () const
bool	hasDenormModeInst () const
bool	useDS128 () const
bool	hasDS96AndDS128 () const
bool	haveRoundOpsF64 () const
	Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool	privateMemoryResourceIsRangeChecked () const
bool	usePRTStrictNull () const
bool	hasUnalignedBufferAccessEnabled () const
bool	hasUnalignedDSAccessEnabled () const
bool	hasUnalignedScratchAccessEnabled () const
bool	isXNACKEnabled () const
bool	isTgSplitEnabled () const
bool	isCuModeEnabled () const
bool	isPreciseMemoryEnabled () const
bool	hasFlatScrRegister () const
bool	hasFlatScratchSTMode () const
bool	hasFlatScratchSVSMode () const
bool	hasFlatScratchEnabled () const
bool	hasGlobalAddTidInsts () const
bool	hasAtomicCSub () const
bool	hasMTBUFInsts () const
bool	hasFormattedMUBUFInsts () const
bool	hasExportInsts () const
bool	hasVINTERPEncoding () const
bool	hasLdsAtomicAddF64 () const
bool	hasMultiDwordFlatScratchAddressing () const
bool	hasFlatLgkmVMemCountInOrder () const
bool	hasD16LoadStore () const
bool	d16PreservesUnusedBits () const
bool	hasD16Images () const
bool	ldsRequiresM0Init () const
	Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool	hasGWSAutoReplay () const
bool	hasGWSSemaReleaseAll () const
bool	hasScalarAddSub64 () const
bool	hasScalarSMulU64 () const
bool	isMesaGfxShader (const Function &F) const
bool	hasMad64_32 () const
bool	hasAtomicFaddInsts () const
bool	vmemWriteNeedsExpWaitcnt () const
bool	hasInstPrefetch () const
bool	hasPrefetch () const
bool	hasSCmpK () const
Align	getStackAlignment () const
bool	enableMachineScheduler () const override
bool	useAA () const override
bool	enableSubRegLiveness () const override
void	setScalarizeGlobalBehavior (bool b)
bool	getScalarizeGlobalBehavior () const
bool	enableEarlyIfConversion () const override
void	overrideSchedPolicy (MachineSchedPolicy &Policy, const SchedRegion &Region) const override
void	overridePostRASchedPolicy (MachineSchedPolicy &Policy, const SchedRegion &Region) const override
void	mirFileLoaded (MachineFunction &MF) const override
unsigned	getMaxNumUserSGPRs () const
bool	useVGPRIndexMode () const
bool	hasScalarCompareEq64 () const
bool	hasLDSFPAtomicAddF32 () const
bool	hasLDSFPAtomicAddF64 () const
bool	hasPermLaneX16 () const
bool	hasPermLane64 () const
bool	hasDPPRowShare () const
bool	hasPkMovB32 () const
bool	hasFmaakFmamkF32Insts () const
bool	hasFmaakFmamkF64Insts () const
bool	hasNonNSAEncoding () const
unsigned	getNSAMaxSize (bool HasSampler=false) const
bool	hasMadF16 () const
bool	hasMovB64 () const
bool	hasScaleOffset () const
bool	hasSignedGVSOffset () const
bool	loadStoreOptEnabled () const
bool	hasUserSGPRInit16BugInWave32 () const
bool	has12DWordStoreHazard () const
bool	hasDwordx3LoadStores () const
bool	hasReadM0MovRelInterpHazard () const
bool	hasReadM0SendMsgHazard () const
bool	hasReadM0LdsDmaHazard () const
bool	hasReadM0LdsDirectHazard () const
bool	hasLDSMisalignedBugInWGPMode () const
bool	hasShift64HighRegBug () const
bool	hasTransForwardingHazard () const
bool	hasDstSelForwardingHazard () const
bool	hasDOTOpSelHazard () const
bool	hasVDecCoExecHazard () const
bool	hasHardClauses () const
bool	hasFPAtomicToDenormModeHazard () const
bool	hasVOP3DPP () const
bool	hasLdsDirect () const
bool	hasLdsWaitVMSRC () const
bool	hasVALUPartialForwardingHazard () const
bool	hasCvtScaleForwardingHazard () const
bool	hasLoopHeadInstSplitSensitivity () const
bool	requiresCodeObjectV6 () const
bool	useVGPRBlockOpsForCSR () const
bool	hasVALUMaskWriteHazard () const
bool	hasVALUReadSGPRHazard () const
bool	setRegModeNeedsVNOPs () const
bool	needsAlignedVGPRs () const
	Return if operations acting on VGPR tuples require even alignment.
bool	hasSPackHL () const
	Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool	hasCompressedExport () const
	Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (enable) bits.
bool	hasNullExportTarget () const
	Return true if the target's EXP instruction supports the NULL export target.
bool	hasFlatScratchSVSSwizzleBug () const
bool	hasDelayAlu () const
	Return true if the target has the S_DELAY_ALU instruction.
bool	hasLDSLoadB96_B128 () const
	Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
bool	hasExtendedWaitCounts () const
bool	hasNoF16PseudoScalarTransInlineConstants () const
bool	hasPKF32InstsReplicatingLower32BitsOfScalarInput () const
bool	hasAddPC64Inst () const
bool	hasExpertSchedulingMode () const
unsigned	maxHardClauseLength () const
unsigned	getOccupancyWithNumSGPRs (unsigned SGPRs) const
	Return the maximum number of waves per SIMD for kernels using `SGPRs` SGPRs.
unsigned	getOccupancyWithNumVGPRs (unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
	Return the maximum number of waves per SIMD for kernels using `VGPRs` VGPRs.
std::pair< unsigned, unsigned >	computeOccupancy (const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
	Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is `F`, each workgroup uses `LDSSize` bytes of LDS, and each wave uses `NumSGPRs` SGPRs and `NumVGPRs` VGPRs.
bool	flatScratchIsPointer () const
bool	hasMergedShaders () const
bool	hasLegacyGeometry () const
bool	hasSplitBarriers () const
bool	hasRrWGMode () const
bool	hasSignedScratchOffsets () const
bool	hasINVWBL2WaitCntRequirement () const
bool	hasVOPD3 () const
bool	hasVectorMulU64 () const
bool	hasMadU64U32NoCarry () const
bool	hasIntMinMax64 () const
bool	hasPkMinMax3Insts () const
bool	hasSGetShaderCyclesInst () const
bool	hasGetPCZeroExtension () const
bool	needsKernArgPreloadProlog () const
bool	hasCondSubInsts () const
bool	hasSubClampInsts () const
unsigned	getSGPRAllocGranule () const
unsigned	getSGPREncodingGranule () const
unsigned	getTotalNumSGPRs () const
unsigned	getAddressableNumSGPRs () const
unsigned	getMinNumSGPRs (unsigned WavesPerEU) const
unsigned	getMaxNumSGPRs (unsigned WavesPerEU, bool Addressable) const
unsigned	getBaseReservedNumSGPRs (const bool HasFlatScratch) const
unsigned	getReservedNumSGPRs (const MachineFunction &MF) const
unsigned	getReservedNumSGPRs (const Function &F) const
unsigned	getMaxNumPreloadedSGPRs () const
unsigned	getBaseMaxNumSGPRs (const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
unsigned	getMaxNumSGPRs (const MachineFunction &MF) const
unsigned	getMaxNumSGPRs (const Function &F) const
unsigned	getVGPRAllocGranule (unsigned DynamicVGPRBlockSize) const
unsigned	getVGPREncodingGranule () const
unsigned	getTotalNumVGPRs () const
unsigned	getAddressableNumArchVGPRs () const
unsigned	getAddressableNumVGPRs (unsigned DynamicVGPRBlockSize) const
unsigned	getMinNumVGPRs (unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned	getMaxNumVGPRs (unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned	getBaseMaxNumVGPRs (const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
unsigned	getMaxNumVGPRs (const Function &F) const
unsigned	getMaxNumAGPRs (const Function &F) const
std::pair< unsigned, unsigned >	getMaxNumVectorRegs (const Function &F) const
	Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit required for the function `MF`.
unsigned	getMaxNumVGPRs (const MachineFunction &MF) const
bool	supportsWave32 () const
bool	supportsWave64 () const
bool	isWave32 () const
bool	isWave64 () const
bool	isWaveSizeKnown () const
	Returns if the wavesize of this subtarget is known reliable.
const TargetRegisterClass *	getBoolRC () const
unsigned	getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const override
unsigned	getMinFlatWorkGroupSize () const override
unsigned	getMaxFlatWorkGroupSize () const override
unsigned	getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const override
unsigned	getMinWavesPerEU () const override
void	adjustSchedDependency (SUnit Def, int DefOpIdx, SUnit Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
bool	shouldClusterStores () const
unsigned	getNSAThreshold (const MachineFunction &MF) const
bool	requiresNopBeforeDeallocVGPRs () const
bool	requiresWaitIdleBeforeGetReg () const
bool	isDynamicVGPREnabled () const
unsigned	getDynamicVGPRBlockSize () const
bool	requiresDisjointEarlyClobberAndUndef () const override
bool	hasDsAtomicAsyncBarrierArriveB64PipeBug () const
bool	hasScratchBaseForwardingHazard () const
bool	hasFlatScratchHiInB64InstHazard () const
bool	requiresWaitXCntForSingleAccessInstructions () const
unsigned	getSNopBits () const
bool	supportsBPermute () const
bool	supportsWaveWideBPermute () const
bool	useRealTrue16Insts () const
	Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
bool	requiresWaitOnWorkgroupReleaseFence () const
unsigned	getMaxWavesPerEU () const
Public Member Functions inherited from llvm::AMDGPUSubtarget
	AMDGPUSubtarget (const Triple &TT)
std::pair< unsigned, unsigned >	getDefaultFlatWorkGroupSize (CallingConv::ID CC) const
std::pair< unsigned, unsigned >	getFlatWorkGroupSizes (const Function &F) const
std::optional< unsigned >	getReqdWorkGroupSize (const Function &F, unsigned Dim) const
bool	hasWavefrontsEvenlySplittingXDim (const Function &F, bool REquiresUniformYZ=false) const
std::pair< unsigned, unsigned >	getWavesPerEU (const Function &F) const
std::pair< unsigned, unsigned >	getWavesPerEU (std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes, const Function &F) const
	Overload which uses the specified values for the flat workgroup sizes and LDS space rather than querying the function itself.
std::pair< unsigned, unsigned >	getEffectiveWavesPerEU (std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
	Returns the target minimum/maximum number of waves per EU.
unsigned	getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
	Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
std::pair< unsigned, unsigned >	getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, const Function &F) const
	Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is `F` and each workgroup running the function requires `LDSBytes` bytes of LDS space.
std::pair< unsigned, unsigned >	getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
	Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned, unsigned >	getOccupancyWithWorkGroupSizes (const MachineFunction &MF) const
	Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is `MF`.
bool	isAmdHsaOS () const
bool	isAmdPalOS () const
bool	isMesa3DOS () const
bool	isMesaKernel (const Function &F) const
bool	isAmdHsaOrMesa (const Function &F) const
bool	isGCN () const
bool	useRealTrue16Insts () const
	Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
bool	hasMulI24 () const
bool	hasMulU24 () const
bool	hasSMulHi () const
bool	hasFminFmaxLegacy () const
unsigned	getWavefrontSize () const
unsigned	getWavefrontSizeLog2 () const
unsigned	getLocalMemorySize () const
	Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
unsigned	getAddressableLocalMemorySize () const
	Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned	getEUsPerCU () const
	Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.
Align	getAlignmentForImplicitArgPtr () const
unsigned	getExplicitKernelArgOffset () const
	Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
unsigned	getMaxWavesPerEU () const
unsigned	getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const
	Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
SmallVector< unsigned >	getMaxNumWorkGroups (const Function &F) const
	Return the number of work groups for the function.
bool	isSingleLaneExecution (const Function &Kernel) const
	Return true if only a single workitem can be active in a wave.
bool	makeLIDRangeMetadata (Instruction *I) const
	Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned	getImplicitArgNumBytes (const Function &F) const
uint64_t	getExplicitKernArgSize (const Function &F, Align &MaxAlign) const
unsigned	getKernArgSegmentSize (const Function &F, Align &MaxAlign) const
AMDGPUDwarfFlavour	getAMDGPUDwarfFlavour () const
virtual	~AMDGPUSubtarget ()=default

Static Public Member Functions
static bool	hasHalfRate64Ops (const TargetSubtargetInfo &STI)
Static Public Member Functions inherited from llvm::AMDGPUSubtarget
static const AMDGPUSubtarget &	get (const MachineFunction &MF)
static const AMDGPUSubtarget &	get (const TargetMachine &TM, const Function &F)

Protected Attributes
AMDGPU::IsaInfo::AMDGPUTargetID	TargetID
unsigned	Gen = INVALID
InstrItineraryData	InstrItins
int	LDSBankCount = 0
unsigned	MaxPrivateElementSize = 0
unsigned	InstCacheLineSize = 0
bool	DynamicVGPR = false
bool	DynamicVGPRBlockSize32 = false
bool	ScalarizeGlobal = false
unsigned	MaxHardClauseLength = 0
	The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than the maximum argument to S_CLAUSE.
Protected Attributes inherited from llvm::AMDGPUSubtarget
bool	HasMulI24 = true
bool	HasMulU24 = true
bool	HasSMulHi = false
bool	HasFminFmaxLegacy = true
unsigned	EUsPerCU = 4
unsigned	MaxWavesPerEU = 10
unsigned	LocalMemorySize = 0
unsigned	AddressableLocalMemorySize = 0
char	WavefrontSizeLog2 = 0
unsigned	FlatOffsetBitWidth = 0

Detailed Description

Definition at line 33 of file GCNSubtarget.h.

Member Enumeration Documentation

◆ TrapHandlerAbi

enum class llvm::GCNSubtarget::TrapHandlerAbi

strong

Enumerator
NONE
AMDHSA

Definition at line 40 of file GCNSubtarget.h.

◆ TrapID

enum class llvm::GCNSubtarget::TrapID

strong

Enumerator
LLVMAMDHSATrap
LLVMAMDHSADebugTrap

Definition at line 45 of file GCNSubtarget.h.

Constructor & Destructor Documentation

◆ GCNSubtarget()

GCNSubtarget::GCNSubtarget	(	const Triple &	TT,
		StringRef	GPU,
		StringRef	FS,
		const GCNTargetMachine &	TM )

Definition at line 179 of file GCNSubtarget.cpp.

References llvm::AMDGPUSubtarget::AMDGPUSubtarget(), llvm::AMDGPUSubtarget::EUsPerCU, llvm::AMDGPU::IsaInfo::getEUsPerCU(), llvm::AMDGPU::IsaInfo::getMaxWavesPerEU(), getStackAlignment(), getTargetLowering(), initializeSubtargetDependencies(), InstrItins, llvm::AMDGPUSubtarget::MaxWavesPerEU, and TargetID.

Referenced by initializeSubtargetDependencies(), and ~GCNSubtarget().

◆ ~GCNSubtarget()

GCNSubtarget::~GCNSubtarget ( )

overridedefault

References F, and GCNSubtarget().

Member Function Documentation

◆ adjustSchedDependency()

void GCNSubtarget::adjustSchedDependency	(	SUnit *	Def,
		int	DefOpIdx,
		SUnit *	Use,
		int	UseOpIdx,
		SDep &	Dep,
		const TargetSchedModel *	SchedModel ) const

override

Definition at line 642 of file GCNSubtarget.cpp.

References llvm::SDep::Data, E(), getInstrItineraryData(), llvm::ilist_node_impl< OptionsT >::getIterator(), llvm::SDep::getKind(), llvm::SDep::getLatency(), llvm::MachineInstr::getParent(), llvm::SDep::getReg(), getRegisterInfo(), I, llvm::MachineBasicBlock::instr_end(), llvm::MachineInstr::isBundle(), llvm::SDep::setLatency(), and TRI.

◆ checkSubtargetFeatures()

void GCNSubtarget::checkSubtargetFeatures ( const Function & F ) const

Diagnose inconsistent subtarget features before attempting to codegen function F.

Definition at line 170 of file GCNSubtarget.cpp.

References F.

Referenced by llvm::AMDGPUDAGToDAGISel::runOnMachineFunction(), and llvm::AMDGPUInstructionSelector::setupMF().

◆ computeOccupancy()

std::pair< unsigned, unsigned > GCNSubtarget::computeOccupancy	(	const Function &	F,
		unsigned	LDSSize = 0,
		unsigned	NumSGPRs = 0,
		unsigned	NumVGPRs = 0 ) const

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F, each workgroup uses LDSSize bytes of LDS, and each wave uses NumSGPRs SGPRs and NumVGPRs VGPRs.

The flat workgroup sizes associated to the function are a range, so this returns a range as well.

Note that occupancy can be affected by the scratch allocation as well, but we do not have enough information to compute it.

Definition at line 452 of file GCNSubtarget.cpp.

References F, llvm::AMDGPU::getDynamicVGPRBlockSize(), getDynamicVGPRBlockSize(), getOccupancyWithNumSGPRs(), getOccupancyWithNumVGPRs(), llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes(), and isDynamicVGPREnabled().

◆ d16PreservesUnusedBits()

bool llvm::GCNSubtarget::d16PreservesUnusedBits ( ) const

inline

Definition at line 376 of file GCNSubtarget.h.

References hasD16LoadStore(), and TargetID.

◆ enableEarlyIfConversion()

bool llvm::GCNSubtarget::enableEarlyIfConversion ( ) const

inlineoverride

Definition at line 447 of file GCNSubtarget.h.

◆ enableMachineScheduler()

bool llvm::GCNSubtarget::enableMachineScheduler ( ) const

inlineoverride

Definition at line 434 of file GCNSubtarget.h.

◆ enableSubRegLiveness()

bool llvm::GCNSubtarget::enableSubRegLiveness ( ) const

inlineoverride

Definition at line 438 of file GCNSubtarget.h.

◆ flatScratchIsPointer()

bool llvm::GCNSubtarget::flatScratchIsPointer ( ) const

inline

Returns: true if the flat_scratch register should be initialized with the pointer to the wave's scratch memory rather than a size and offset.

Definition at line 674 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ getAddressableNumArchVGPRs()

unsigned llvm::GCNSubtarget::getAddressableNumArchVGPRs ( ) const

inline

Returns: Addressable number of architectural VGPRs supported by the subtarget.

Definition at line 822 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getAddressableNumArchVGPRs().

Referenced by getMaxNumVectorRegs().

◆ getAddressableNumSGPRs()

unsigned llvm::GCNSubtarget::getAddressableNumSGPRs ( ) const

inline

Returns: Addressable number of SGPRs supported by the subtarget.

Definition at line 748 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs().

◆ getAddressableNumVGPRs()

unsigned llvm::GCNSubtarget::getAddressableNumVGPRs ( unsigned DynamicVGPRBlockSize ) const

inline

Returns: Addressable number of VGPRs supported by the subtarget.

Definition at line 827 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs().

◆ getBaseMaxNumSGPRs()

unsigned GCNSubtarget::getBaseMaxNumSGPRs	(	const Function &	F,
		std::pair< unsigned, unsigned >	WavesPerEU,
		unsigned	PreloadedSGPRs,
		unsigned	ReservedNumSGPRs ) const

Returns: max num SGPRs. This is the common utility function called by MachineFunction and Function variants of getMaxNumSGPRs.

Definition at line 469 of file GCNSubtarget.cpp.

References F, llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, getMaxNumSGPRs(), and getMinNumSGPRs().

Referenced by getMaxNumSGPRs(), and getMaxNumSGPRs().

◆ getBaseMaxNumVGPRs()

unsigned GCNSubtarget::getBaseMaxNumVGPRs	(	const Function &	F,
		std::pair< unsigned, unsigned >	NumVGPRBounds ) const

Returns: max num VGPRs. This is the common utility function called by MachineFunction and Function variants of getMaxNumVGPRs.

Definition at line 553 of file GCNSubtarget.cpp.

References F.

Referenced by getMaxNumVGPRs().

◆ getBaseReservedNumSGPRs()

unsigned GCNSubtarget::getBaseReservedNumSGPRs ( const bool HasFlatScratch ) const

Returns: Reserved number of SGPRs. This is common utility function called by MachineFunction and Function variants of getReservedNumSGPRs.

Definition at line 422 of file GCNSubtarget.cpp.

References getGeneration(), llvm::AMDGPUSubtarget::GFX10, isXNACKEnabled(), llvm::AMDGPUSubtarget::SEA_ISLANDS, and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

Referenced by getReservedNumSGPRs(), and getReservedNumSGPRs().

◆ getBoolRC()

const TargetRegisterClass * llvm::GCNSubtarget::getBoolRC ( ) const

inline

Definition at line 895 of file GCNSubtarget.h.

References llvm::SIRegisterInfo::getBoolRC(), and getRegisterInfo().

◆ getCallLowering()

const CallLowering * llvm::GCNSubtarget::getCallLowering ( ) const

inlineoverride

Definition at line 117 of file GCNSubtarget.h.

◆ getConstantBusLimit()

unsigned GCNSubtarget::getConstantBusLimit ( unsigned Opcode ) const

Definition at line 208 of file GCNSubtarget.cpp.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ getDynamicVGPRBlockSize()

unsigned llvm::GCNSubtarget::getDynamicVGPRBlockSize ( ) const

inline

Definition at line 949 of file GCNSubtarget.h.

References DynamicVGPRBlockSize32.

Referenced by computeOccupancy(), and getMaxNumVGPRs().

◆ getFrameLowering()

const SIFrameLowering * llvm::GCNSubtarget::getFrameLowering ( ) const

inlineoverride

Definition at line 105 of file GCNSubtarget.h.

◆ getGeneration()

Generation llvm::GCNSubtarget::getGeneration ( ) const

inline

Definition at line 147 of file GCNSubtarget.h.

References Gen.

Referenced by createOccupancy(), flatScratchIsPointer(), getBaseReservedNumSGPRs(), getConstantBusLimit(), getMaxWaveScratchSize(), getNSAThreshold(), getOccupancyWithNumSGPRs(), getSetRegWaitStates(), getSNopBits(), has12DWordStoreHazard(), hasAddr64(), hasD16Images(), hasD16LoadStore(), hasDenormModeInst(), hasDPPRowShare(), hasDsAtomicAsyncBarrierArriveB64PipeBug(), hasExpertSchedulingMode(), hasExtendedWaitCounts(), hasFlat(), hasFlatLgkmVMemCountInOrder(), hasFlatScratchHiInB64InstHazard(), hasFlatScratchSVSSwizzleBug(), hasFmaakFmamkF32Insts(), hasFPAtomicToDenormModeHazard(), hasFractBug(), hasGWSAutoReplay(), hasInstPrefetch(), hasLdsDirect(), hasLdsWaitVMSRC(), hasLegacyGeometry(), hasMad64_32(), hasMed3_16(), hasMergedShaders(), hasMin3Max3_16(), hasMultiDwordFlatScratchAddressing(), hasNoF16PseudoScalarTransInlineConstants(), hasNonNSAEncoding(), hasOnlyRevVALUShifts(), hasPermLane64(), hasPermLaneX16(), hasPKF32InstsReplicatingLower32BitsOfScalarInput(), hasReadM0LdsDirectHazard(), hasReadM0LdsDmaHazard(), hasReadM0MovRelInterpHazard(), hasReadM0SendMsgHazard(), hasReadVCCZBug(), hasRFEHazards(), hasRrWGMode(), hasScalarAddSub64(), hasScalarCompareEq64(), hasScalarSMulU64(), hasScalarSubwordLoads(), hasSCmpK(), hasScratchBaseForwardingHazard(), hasSignedScratchOffsets(), hasSMRDReadVALUDefHazard(), hasSplitBarriers(), hasUsableDivScaleConditionOutput(), hasUsableDSOffset(), hasVALUMaskWriteHazard(), hasVALUPartialForwardingHazard(), hasVMEMReadSGPRVALUDefHazard(), hasVOP3DPP(), initializeSubtargetDependencies(), isGFX11Plus(), ldsRequiresM0Init(), partialVCCWritesUpdateVCCZ(), privateMemoryResourceIsRangeChecked(), requiresWaitOnWorkgroupReleaseFence(), llvm::AMDGPUAsmPrinter::runOnMachineFunction(), setRegModeNeedsVNOPs(), shouldClusterStores(), supportsBPermute(), supportsGetDoorbellID(), supportsMinMaxDenormModes(), supportsWave32(), supportsWaveWideBPermute(), supportsWGP(), vmemWriteNeedsExpWaitcnt(), and zeroesHigh16BitsOfDest().

◆ getInlineAsmLowering()

const InlineAsmLowering * llvm::GCNSubtarget::getInlineAsmLowering ( ) const

inlineoverride

Definition at line 121 of file GCNSubtarget.h.

◆ getInstCacheLineSize()

unsigned llvm::GCNSubtarget::getInstCacheLineSize ( ) const

inline

Instruction cache line size in bytes (64 for pre-GFX11, 128 for GFX11+).

Definition at line 177 of file GCNSubtarget.h.

References InstCacheLineSize.

Referenced by llvm::SITargetLowering::SITargetLowering().

◆ getInstrInfo()

const SIInstrInfo * llvm::GCNSubtarget::getInstrInfo ( ) const

inlineoverride

Definition at line 103 of file GCNSubtarget.h.

◆ getInstrItineraryData()

const InstrItineraryData * llvm::GCNSubtarget::getInstrItineraryData ( ) const

inlineoverride

Definition at line 141 of file GCNSubtarget.h.

References InstrItins.

Referenced by adjustSchedDependency().

◆ getInstructionSelector()

InstructionSelector * llvm::GCNSubtarget::getInstructionSelector ( ) const

inlineoverride

Definition at line 125 of file GCNSubtarget.h.

◆ getKnownHighZeroBitsForFrameIndex()

unsigned llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex ( ) const

inline

Return the number of high bits known to be zero for a frame index.

Definition at line 170 of file GCNSubtarget.h.

References llvm::countl_zero(), getMaxWaveScratchSize(), and llvm::AMDGPUSubtarget::getWavefrontSizeLog2().

Referenced by llvm::SITargetLowering::LowerFormalArguments().

◆ getLDSBankCount()

int llvm::GCNSubtarget::getLDSBankCount ( ) const

inline

Definition at line 174 of file GCNSubtarget.h.

References LDSBankCount.

◆ getLegalizerInfo()

const LegalizerInfo * llvm::GCNSubtarget::getLegalizerInfo ( ) const

inlineoverride

Definition at line 129 of file GCNSubtarget.h.

◆ getMaxFlatWorkGroupSize()

unsigned llvm::GCNSubtarget::getMaxFlatWorkGroupSize ( ) const

inlineoverridevirtual

Returns: Maximum flat work group size supported by the subtarget.

Implements llvm::AMDGPUSubtarget.

Definition at line 911 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize().

◆ getMaxLocalMemSizeWithWaveCount()

unsigned llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount	(	unsigned	WaveCount,
		const Function &	) const

Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

◆ getMaxNumAGPRs()

unsigned llvm::GCNSubtarget::getMaxNumAGPRs ( const Function & F ) const

inline

Definition at line 863 of file GCNSubtarget.h.

References F, and getMaxNumVGPRs().

◆ getMaxNumPreloadedSGPRs()

unsigned GCNSubtarget::getMaxNumPreloadedSGPRs ( ) const

Returns: Maximum number of preloaded SGPRs for the subtarget.

Definition at line 523 of file GCNSubtarget.cpp.

Referenced by getMaxNumSGPRs().

◆ getMaxNumSGPRs() [1/3]

unsigned GCNSubtarget::getMaxNumSGPRs ( const Function & F ) const

Returns: Maximum number of SGPRs that meets number of waves per execution unit requirement for function F, or number of SGPRs explicitly requested using "amdgpu-num-sgpr" attribute attached to function F.; Value that meets number of waves per execution unit requirement if explicitly requested value cannot be converted to integer, violates subtarget's specifications, or does not meet number of waves per execution unit requirement.

Definition at line 548 of file GCNSubtarget.cpp.

References F, getBaseMaxNumSGPRs(), getMaxNumPreloadedSGPRs(), getReservedNumSGPRs(), and llvm::AMDGPUSubtarget::getWavesPerEU().

◆ getMaxNumSGPRs() [2/3]

unsigned GCNSubtarget::getMaxNumSGPRs ( const MachineFunction & MF ) const

Returns: Maximum number of SGPRs that meets number of waves per execution unit requirement for function MF, or number of SGPRs explicitly requested using "amdgpu-num-sgpr" attribute attached to function MF.; Value that meets number of waves per execution unit requirement if explicitly requested value cannot be converted to integer, violates subtarget's specifications, or does not meet number of waves per execution unit requirement.

Definition at line 516 of file GCNSubtarget.cpp.

References F, getBaseMaxNumSGPRs(), llvm::MachineFunction::getFunction(), llvm::MachineFunction::getInfo(), llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs(), getReservedNumSGPRs(), and llvm::SIMachineFunctionInfo::getWavesPerEU().

◆ getMaxNumSGPRs() [3/3]

unsigned llvm::GCNSubtarget::getMaxNumSGPRs	(	unsigned	WavesPerEU,
		bool	Addressable ) const

inline

Returns: Maximum number of SGPRs that meets the given number of waves per execution unit requirement supported by the subtarget.

Definition at line 760 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMaxNumSGPRs().

Referenced by getBaseMaxNumSGPRs().

◆ getMaxNumUserSGPRs()

unsigned llvm::GCNSubtarget::getMaxNumUserSGPRs ( ) const

inline

Definition at line 457 of file GCNSubtarget.h.

References llvm::AMDGPU::getMaxNumUserSGPRs().

◆ getMaxNumVectorRegs()

std::pair< unsigned, unsigned > GCNSubtarget::getMaxNumVectorRegs ( const Function & F ) const

Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit required for the function MF.

Definition at line 586 of file GCNSubtarget.cpp.

References llvm::alignTo(), assert(), F, getAddressableNumArchVGPRs(), llvm::AMDGPU::getIntegerPairAttribute(), and getMaxNumVGPRs().

◆ getMaxNumVGPRs() [1/3]

unsigned GCNSubtarget::getMaxNumVGPRs ( const Function & F ) const

Returns: Maximum number of VGPRs that meets number of waves per execution unit requirement for function F, or number of VGPRs explicitly requested using "amdgpu-num-vgpr" attribute attached to function F.; Value that meets number of waves per execution unit requirement if explicitly requested value cannot be converted to integer, violates subtarget's specifications, or does not meet number of waves per execution unit requirement.

Definition at line 568 of file GCNSubtarget.cpp.

References F, getBaseMaxNumVGPRs(), llvm::AMDGPU::getDynamicVGPRBlockSize(), getDynamicVGPRBlockSize(), getMaxNumVGPRs(), getMinNumVGPRs(), llvm::AMDGPUSubtarget::getWavesPerEU(), and isDynamicVGPREnabled().

◆ getMaxNumVGPRs() [2/3]

unsigned GCNSubtarget::getMaxNumVGPRs ( const MachineFunction & MF ) const

Returns: Maximum number of VGPRs that meets number of waves per execution unit requirement for function MF, or number of VGPRs explicitly requested using "amdgpu-num-vgpr" attribute attached to function MF.; Value that meets number of waves per execution unit requirement if explicitly requested value cannot be converted to integer, violates subtarget's specifications, or does not meet number of waves per execution unit requirement.

Definition at line 581 of file GCNSubtarget.cpp.

References llvm::MachineFunction::getFunction(), and getMaxNumVGPRs().

◆ getMaxNumVGPRs() [3/3]

unsigned llvm::GCNSubtarget::getMaxNumVGPRs	(	unsigned	WavesPerEU,
		unsigned	DynamicVGPRBlockSize ) const

inline

Returns: the maximum number of VGPRs that can be used and still achieved at least the specified number of waves WavesPerEU.

Definition at line 841 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMaxNumVGPRs().

Referenced by getMaxNumAGPRs(), getMaxNumVectorRegs(), getMaxNumVGPRs(), and getMaxNumVGPRs().

◆ getMaxPrivateElementSize()

unsigned llvm::GCNSubtarget::getMaxPrivateElementSize ( bool ForBufferRSrc = false ) const

inline

Definition at line 179 of file GCNSubtarget.h.

References hasFlatScratchEnabled(), and MaxPrivateElementSize.

Referenced by llvm::SITargetLowering::canMergeStoresTo().

◆ getMaxWaveScratchSize()

unsigned llvm::GCNSubtarget::getMaxWaveScratchSize ( ) const

inline

Definition at line 155 of file GCNSubtarget.h.

References getGeneration(), llvm::AMDGPUSubtarget::GFX11, and llvm::AMDGPUSubtarget::GFX12.

Referenced by getKnownHighZeroBitsForFrameIndex().

◆ getMaxWavesPerEU()

unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( ) const

inline

Returns: Maximum number of waves per execution unit supported by the subtarget without any kind of limitation.

Definition at line 287 of file AMDGPUSubtarget.h.

Referenced by getOccupancyWithNumSGPRs().

◆ getMaxWorkGroupsPerCU()

unsigned llvm::GCNSubtarget::getMaxWorkGroupsPerCU ( unsigned FlatWorkGroupSize ) const

inlineoverridevirtual

Returns: Maximum number of work groups per compute unit supported by the subtarget and limited by given FlatWorkGroupSize.

Implements llvm::AMDGPUSubtarget.

Definition at line 901 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU().

◆ getMinFlatWorkGroupSize()

unsigned llvm::GCNSubtarget::getMinFlatWorkGroupSize ( ) const

inlineoverridevirtual

Returns: Minimum flat work group size supported by the subtarget.

Implements llvm::AMDGPUSubtarget.

Definition at line 906 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize().

◆ getMinNumSGPRs()

unsigned llvm::GCNSubtarget::getMinNumSGPRs ( unsigned WavesPerEU ) const

inline

Returns: Minimum number of SGPRs that meets the given number of waves per execution unit requirement supported by the subtarget.

Definition at line 754 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMinNumSGPRs().

Referenced by getBaseMaxNumSGPRs().

◆ getMinNumVGPRs()

unsigned llvm::GCNSubtarget::getMinNumVGPRs	(	unsigned	WavesPerEU,
		unsigned	DynamicVGPRBlockSize ) const

inline

Returns: the minimum number of VGPRs that will prevent achieving more than the specified number of waves WavesPerEU.

Definition at line 833 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMinNumVGPRs().

Referenced by getMaxNumVGPRs().

◆ getMinWavesPerEU()

unsigned llvm::GCNSubtarget::getMinWavesPerEU ( ) const

inlineoverridevirtual

Returns: Minimum number of waves per execution unit supported by the subtarget.

Implements llvm::AMDGPUSubtarget.

Definition at line 924 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMinWavesPerEU().

◆ getNSAMaxSize()

unsigned llvm::GCNSubtarget::getNSAMaxSize ( bool HasSampler = false ) const

inline

Definition at line 493 of file GCNSubtarget.h.

References llvm::AMDGPU::getNSAMaxSize().

◆ getNSAThreshold()

unsigned GCNSubtarget::getNSAThreshold ( const MachineFunction & MF ) const

Definition at line 691 of file GCNSubtarget.cpp.

References llvm::Function::getFnAttributeAsParsedInteger(), llvm::MachineFunction::getFunction(), getGeneration(), llvm::AMDGPUSubtarget::GFX12, and NSAThreshold.

◆ getOccupancyWithNumSGPRs()

unsigned GCNSubtarget::getOccupancyWithNumSGPRs ( unsigned SGPRs ) const

Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.

Definition at line 409 of file GCNSubtarget.cpp.

References getGeneration(), getMaxWavesPerEU(), and llvm::AMDGPU::IsaInfo::getOccupancyWithNumSGPRs().

Referenced by computeOccupancy().

◆ getOccupancyWithNumVGPRs()

unsigned GCNSubtarget::getOccupancyWithNumVGPRs	(	unsigned	VGPRs,
		unsigned	DynamicVGPRBlockSize ) const

Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.

Definition at line 415 of file GCNSubtarget.cpp.

References llvm::AMDGPU::IsaInfo::getNumWavesPerEUWithNumVGPRs().

Referenced by computeOccupancy().

◆ getRegBankInfo()

const AMDGPURegisterBankInfo * llvm::GCNSubtarget::getRegBankInfo ( ) const

inlineoverride

Definition at line 133 of file GCNSubtarget.h.

◆ getRegisterInfo()

const SIRegisterInfo * llvm::GCNSubtarget::getRegisterInfo ( ) const

inlineoverride

Definition at line 111 of file GCNSubtarget.h.

Referenced by adjustSchedDependency(), llvm::GCNTargetMachine::convertFuncInfoToYAML(), llvm::AMDGPUAsmPrinter::emitInstruction(), getBoolRC(), llvm::SITargetLowering::insertCopiesSplitCSR(), llvm::SITargetLowering::isEligibleForTailCallOptimization(), llvm::SITargetLowering::LowerFormalArguments(), llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel(), llvm::SITargetLowering::LowerReturn(), and llvm::SITargetLowering::SITargetLowering().

◆ getReservedNumSGPRs() [1/2]

unsigned GCNSubtarget::getReservedNumSGPRs ( const Function & F ) const

Returns: Reserved number of SGPRs for given function F.

Definition at line 443 of file GCNSubtarget.cpp.

References F, and getBaseReservedNumSGPRs().

◆ getReservedNumSGPRs() [2/2]

unsigned GCNSubtarget::getReservedNumSGPRs ( const MachineFunction & MF ) const

Returns: Reserved number of SGPRs for given machine function MF.

Definition at line 438 of file GCNSubtarget.cpp.

References getBaseReservedNumSGPRs(), llvm::MachineFunction::getInfo(), llvm::SIMachineFunctionInfo::getUserSGPRInfo(), and llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit().

Referenced by getMaxNumSGPRs(), and getMaxNumSGPRs().

◆ getScalarizeGlobalBehavior()

bool llvm::GCNSubtarget::getScalarizeGlobalBehavior ( ) const

inline

Definition at line 441 of file GCNSubtarget.h.

References ScalarizeGlobal.

◆ getSelectionDAGInfo()

const SelectionDAGTargetInfo * GCNSubtarget::getSelectionDAGInfo ( ) const

override

Definition at line 204 of file GCNSubtarget.cpp.

◆ getSetRegWaitStates()

unsigned llvm::GCNSubtarget::getSetRegWaitStates ( ) const

inline

Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.

Definition at line 273 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ getSGPRAllocGranule()

unsigned llvm::GCNSubtarget::getSGPRAllocGranule ( ) const

inline

Returns: SGPR allocation granularity supported by the subtarget.

Definition at line 733 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getSGPRAllocGranule().

◆ getSGPREncodingGranule()

unsigned llvm::GCNSubtarget::getSGPREncodingGranule ( ) const

inline

Returns: SGPR encoding granularity supported by the subtarget.

Definition at line 738 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getSGPREncodingGranule().

◆ getSNopBits()

unsigned llvm::GCNSubtarget::getSNopBits ( ) const

inline

Returns: the number of significant bits in the immediate field of the S_NOP instruction.

Definition at line 986 of file GCNSubtarget.h.

References getGeneration(), llvm::AMDGPUSubtarget::GFX12, and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ getStackAlignment()

Align llvm::GCNSubtarget::getStackAlignment ( ) const

inline

Definition at line 432 of file GCNSubtarget.h.

Referenced by GCNSubtarget().

◆ getTargetID()

const AMDGPU::IsaInfo::AMDGPUTargetID & llvm::GCNSubtarget::getTargetID ( ) const

inline

Definition at line 137 of file GCNSubtarget.h.

References TargetID.

Referenced by llvm::AMDGPUAsmPrinter::runOnMachineFunction().

◆ getTargetLowering()

const SITargetLowering * llvm::GCNSubtarget::getTargetLowering ( ) const

inlineoverride

Definition at line 109 of file GCNSubtarget.h.

Referenced by GCNSubtarget(), and getTM().

◆ getTotalNumSGPRs()

unsigned llvm::GCNSubtarget::getTotalNumSGPRs ( ) const

inline

Returns: Total number of SGPRs supported by the subtarget.

Definition at line 743 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getTotalNumSGPRs().

◆ getTotalNumVGPRs()

unsigned llvm::GCNSubtarget::getTotalNumVGPRs ( ) const

inline

Returns: Total number of VGPRs supported by the subtarget.

Definition at line 816 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getTotalNumVGPRs().

◆ getTrapHandlerAbi()

TrapHandlerAbi llvm::GCNSubtarget::getTrapHandlerAbi ( ) const

inline

Definition at line 229 of file GCNSubtarget.h.

References AMDHSA, llvm::AMDGPUSubtarget::isAmdHsaOS(), and NONE.

◆ getVGPRAllocGranule()

unsigned llvm::GCNSubtarget::getVGPRAllocGranule ( unsigned DynamicVGPRBlockSize ) const

inline

Returns: VGPR allocation granularity supported by the subtarget.

Definition at line 806 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getVGPRAllocGranule().

◆ getVGPREncodingGranule()

unsigned llvm::GCNSubtarget::getVGPREncodingGranule ( ) const

inline

Returns: VGPR encoding granularity supported by the subtarget.

Definition at line 811 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getVGPREncodingGranule().

◆ getWavesPerEUForWorkGroup()

unsigned llvm::GCNSubtarget::getWavesPerEUForWorkGroup ( unsigned FlatWorkGroupSize ) const

inlineoverridevirtual

Returns: Number of waves per execution unit required to support the given FlatWorkGroupSize.

Implements llvm::AMDGPUSubtarget.

Definition at line 918 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup().

◆ has12DWordStoreHazard()

bool llvm::GCNSubtarget::has12DWordStoreHazard ( ) const

inline

Definition at line 513 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS.

◆ hasAddPC64Inst()

bool llvm::GCNSubtarget::hasAddPC64Inst ( ) const

inline

Definition at line 638 of file GCNSubtarget.h.

◆ hasAddr64()

bool llvm::GCNSubtarget::hasAddr64 ( ) const

inline

Definition at line 199 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

Referenced by initializeSubtargetDependencies().

◆ hasAtomicCSub()

bool llvm::GCNSubtarget::hasAtomicCSub ( ) const

inline

Definition at line 349 of file GCNSubtarget.h.

◆ hasAtomicFaddInsts()

bool llvm::GCNSubtarget::hasAtomicFaddInsts ( ) const

inline

Definition at line 407 of file GCNSubtarget.h.

◆ hasCompressedExport()

bool llvm::GCNSubtarget::hasCompressedExport ( ) const

inline

Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (enable) bits.

Definition at line 605 of file GCNSubtarget.h.

◆ hasCondSubInsts()

bool llvm::GCNSubtarget::hasCondSubInsts ( ) const

inline

Definition at line 728 of file GCNSubtarget.h.

◆ hasCvtScaleForwardingHazard()

bool llvm::GCNSubtarget::hasCvtScaleForwardingHazard ( ) const

inline

Definition at line 575 of file GCNSubtarget.h.

◆ hasD16Images()

bool llvm::GCNSubtarget::hasD16Images ( ) const

inline

Definition at line 380 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasD16LoadStore()

bool llvm::GCNSubtarget::hasD16LoadStore ( ) const

inline

Definition at line 374 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

Referenced by d16PreservesUnusedBits().

◆ hasDelayAlu()

bool llvm::GCNSubtarget::hasDelayAlu ( ) const

inline

Return true if the target has the S_DELAY_ALU instruction.

Definition at line 614 of file GCNSubtarget.h.

◆ hasDenormModeInst()

bool llvm::GCNSubtarget::hasDenormModeInst ( ) const

inline

Returns: If target supports S_DENORM_MODE.

Definition at line 287 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasDOTOpSelHazard()

bool llvm::GCNSubtarget::hasDOTOpSelHazard ( ) const

inline

Definition at line 554 of file GCNSubtarget.h.

◆ hasDPPRowShare()

bool llvm::GCNSubtarget::hasDPPRowShare ( ) const

inline

Definition at line 478 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasDS96AndDS128()

bool llvm::GCNSubtarget::hasDS96AndDS128 ( ) const

inline

Returns: If target supports ds_read/write_b96/128.

Definition at line 296 of file GCNSubtarget.h.

◆ hasDsAtomicAsyncBarrierArriveB64PipeBug()

bool llvm::GCNSubtarget::hasDsAtomicAsyncBarrierArriveB64PipeBug ( ) const

inline

Definition at line 961 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasDstSelForwardingHazard()

bool llvm::GCNSubtarget::hasDstSelForwardingHazard ( ) const

inline

Definition at line 551 of file GCNSubtarget.h.

◆ hasDwordx3LoadStores()

bool llvm::GCNSubtarget::hasDwordx3LoadStores ( ) const

inline

Definition at line 518 of file GCNSubtarget.h.

◆ hasExpertSchedulingMode()

bool llvm::GCNSubtarget::hasExpertSchedulingMode ( ) const

inline

Returns: true if the target supports expert scheduling mode 2 which relies on the compiler to insert waits to avoid hazards between VMEM and VALU instructions in some instances.

Definition at line 643 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasExportInsts()

bool llvm::GCNSubtarget::hasExportInsts ( ) const

inline

Definition at line 355 of file GCNSubtarget.h.

◆ hasExtendedWaitCounts()

bool llvm::GCNSubtarget::hasExtendedWaitCounts ( ) const

inline

Returns: true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.

Definition at line 623 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasFlat()

bool llvm::GCNSubtarget::hasFlat ( ) const

inline

Definition at line 203 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS.

Referenced by initializeSubtargetDependencies().

◆ hasFlatLgkmVMemCountInOrder()

bool llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder ( ) const

inline

Definition at line 372 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasFlatScratchEnabled()

bool llvm::GCNSubtarget::hasFlatScratchEnabled ( ) const

inline

Definition at line 342 of file GCNSubtarget.h.

Referenced by getMaxPrivateElementSize().

◆ hasFlatScratchHiInB64InstHazard()

bool llvm::GCNSubtarget::hasFlatScratchHiInB64InstHazard ( ) const

inline

Definition at line 973 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasFlatScratchSTMode()

bool llvm::GCNSubtarget::hasFlatScratchSTMode ( ) const

inline

Definition at line 336 of file GCNSubtarget.h.

◆ hasFlatScratchSVSMode()

bool llvm::GCNSubtarget::hasFlatScratchSVSMode ( ) const

inline

Definition at line 340 of file GCNSubtarget.h.

◆ hasFlatScratchSVSSwizzleBug()

bool llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug ( ) const

inline

Definition at line 611 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasFlatScrRegister()

bool llvm::GCNSubtarget::hasFlatScrRegister ( ) const

inline

Definition at line 331 of file GCNSubtarget.h.

◆ hasFmaakFmamkF32Insts()

bool llvm::GCNSubtarget::hasFmaakFmamkF32Insts ( ) const

inline

Definition at line 485 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasFmaakFmamkF64Insts()

bool llvm::GCNSubtarget::hasFmaakFmamkF64Insts ( ) const

inline

Definition at line 489 of file GCNSubtarget.h.

◆ hasFormattedMUBUFInsts()

bool llvm::GCNSubtarget::hasFormattedMUBUFInsts ( ) const

inline

Definition at line 353 of file GCNSubtarget.h.

◆ hasFPAtomicToDenormModeHazard()

bool llvm::GCNSubtarget::hasFPAtomicToDenormModeHazard ( ) const

inline

Definition at line 561 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasFractBug()

bool llvm::GCNSubtarget::hasFractBug ( ) const

inline

Definition at line 213 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS.

◆ hasGetPCZeroExtension()

bool llvm::GCNSubtarget::hasGetPCZeroExtension ( ) const

inline

Definition at line 718 of file GCNSubtarget.h.

◆ hasGlobalAddTidInsts()

bool llvm::GCNSubtarget::hasGlobalAddTidInsts ( ) const

inline

Definition at line 347 of file GCNSubtarget.h.

◆ hasGWSAutoReplay()

bool llvm::GCNSubtarget::hasGWSAutoReplay ( ) const

inline

Definition at line 391 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasGWSSemaReleaseAll()

bool llvm::GCNSubtarget::hasGWSSemaReleaseAll ( ) const

inline

Returns: if target has ds_gws_sema_release_all instruction.

Definition at line 394 of file GCNSubtarget.h.

◆ hasHalfRate64Ops()

bool llvm::GCNSubtarget::hasHalfRate64Ops ( const TargetSubtargetInfo & STI )

static

◆ hasHardClauses()

bool llvm::GCNSubtarget::hasHardClauses ( ) const

inline

Definition at line 559 of file GCNSubtarget.h.

References MaxHardClauseLength.

◆ hasHWFP64()

bool llvm::GCNSubtarget::hasHWFP64 ( ) const

inline

Definition at line 197 of file GCNSubtarget.h.

◆ hasInstPrefetch()

bool llvm::GCNSubtarget::hasInstPrefetch ( ) const

inline

Definition at line 415 of file GCNSubtarget.h.

References getGeneration(), llvm::AMDGPUSubtarget::GFX10, and llvm::AMDGPUSubtarget::GFX11.

◆ hasIntMinMax64()

bool llvm::GCNSubtarget::hasIntMinMax64 ( ) const

inline

Definition at line 707 of file GCNSubtarget.h.

◆ hasINVWBL2WaitCntRequirement()

bool llvm::GCNSubtarget::hasINVWBL2WaitCntRequirement ( ) const

inline

Definition at line 695 of file GCNSubtarget.h.

◆ hasLdsAtomicAddF64()

bool llvm::GCNSubtarget::hasLdsAtomicAddF64 ( ) const

inline

Definition at line 364 of file GCNSubtarget.h.

◆ hasLdsDirect()

bool llvm::GCNSubtarget::hasLdsDirect ( ) const

inline

Definition at line 567 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasLDSFPAtomicAddF32()

bool llvm::GCNSubtarget::hasLDSFPAtomicAddF32 ( ) const

inline

Definition at line 467 of file GCNSubtarget.h.

◆ hasLDSFPAtomicAddF64()

bool llvm::GCNSubtarget::hasLDSFPAtomicAddF64 ( ) const

inline

Definition at line 468 of file GCNSubtarget.h.

◆ hasLDSLoadB96_B128()

bool llvm::GCNSubtarget::hasLDSLoadB96_B128 ( ) const

inline

Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.

Definition at line 619 of file GCNSubtarget.h.

◆ hasLDSMisalignedBugInWGPMode()

bool llvm::GCNSubtarget::hasLDSMisalignedBugInWGPMode ( ) const

inline

Definition at line 537 of file GCNSubtarget.h.

◆ hasLdsWaitVMSRC()

bool llvm::GCNSubtarget::hasLdsWaitVMSRC ( ) const

inline

Definition at line 569 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasLegacyGeometry()

bool llvm::GCNSubtarget::hasLegacyGeometry ( ) const

inline

Definition at line 683 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasLoopHeadInstSplitSensitivity()

bool llvm::GCNSubtarget::hasLoopHeadInstSplitSensitivity ( ) const

inline

Definition at line 581 of file GCNSubtarget.h.

◆ hasMad64_32()

bool llvm::GCNSubtarget::hasMad64_32 ( ) const

inline

Definition at line 405 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ hasMadF16()

bool GCNSubtarget::hasMadF16 ( ) const

Definition at line 399 of file GCNSubtarget.cpp.

Referenced by llvm::SITargetLowering::SITargetLowering().

◆ hasMadU64U32NoCarry()

bool llvm::GCNSubtarget::hasMadU64U32NoCarry ( ) const

inline

Definition at line 704 of file GCNSubtarget.h.

◆ hasMed3_16()

bool llvm::GCNSubtarget::hasMed3_16 ( ) const

inline

Definition at line 215 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasMergedShaders()

bool llvm::GCNSubtarget::hasMergedShaders ( ) const

inline

Returns: true if the machine has merged shaders in which s0-s7 are reserved by the hardware and user SGPRs start at s8

Definition at line 680 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasMin3Max3_16()

bool llvm::GCNSubtarget::hasMin3Max3_16 ( ) const

inline

Definition at line 217 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

Referenced by supportsMin3Max3().

◆ hasMovB64()

bool llvm::GCNSubtarget::hasMovB64 ( ) const

inline

Definition at line 499 of file GCNSubtarget.h.

◆ hasMTBUFInsts()

bool llvm::GCNSubtarget::hasMTBUFInsts ( ) const

inline

Definition at line 351 of file GCNSubtarget.h.

◆ hasMultiDwordFlatScratchAddressing()

bool llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing ( ) const

inline

Definition at line 368 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasNoF16PseudoScalarTransInlineConstants()

bool llvm::GCNSubtarget::hasNoF16PseudoScalarTransInlineConstants ( ) const

inline

Returns: true if inline constants are not supported for F16 pseudo scalar transcendentals.

Definition at line 627 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasNonNSAEncoding()

bool llvm::GCNSubtarget::hasNonNSAEncoding ( ) const

inline

Definition at line 491 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasNullExportTarget()

bool llvm::GCNSubtarget::hasNullExportTarget ( ) const

inline

Return true if the target's EXP instruction supports the NULL export target.

Definition at line 609 of file GCNSubtarget.h.

◆ hasOnlyRevVALUShifts()

bool llvm::GCNSubtarget::hasOnlyRevVALUShifts ( ) const

inline

Definition at line 209 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasPermLane64()

bool llvm::GCNSubtarget::hasPermLane64 ( ) const

inline

Returns: true if the subtarget has the v_permlane64_b32 instruction.

Definition at line 476 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasPermLaneX16()

bool llvm::GCNSubtarget::hasPermLaneX16 ( ) const

inline

Returns: true if the subtarget has the v_permlanex16_b32 instruction.

Definition at line 473 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasPKF32InstsReplicatingLower32BitsOfScalarInput()

bool llvm::GCNSubtarget::hasPKF32InstsReplicatingLower32BitsOfScalarInput ( ) const

inline

Returns: true if the target has packed f32 instructions that only read 32 bits from a scalar operand (SGPR or literal) and replicates the bits to both channels.

Definition at line 634 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasPkMinMax3Insts()

bool llvm::GCNSubtarget::hasPkMinMax3Insts ( ) const

inline

Definition at line 710 of file GCNSubtarget.h.

◆ hasPkMovB32()

bool llvm::GCNSubtarget::hasPkMovB32 ( ) const

inline

Definition at line 483 of file GCNSubtarget.h.

◆ hasPrefetch()

bool llvm::GCNSubtarget::hasPrefetch ( ) const

inline

Definition at line 419 of file GCNSubtarget.h.

◆ hasReadM0LdsDirectHazard()

bool llvm::GCNSubtarget::hasReadM0LdsDirectHazard ( ) const

inline

Definition at line 533 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasReadM0LdsDmaHazard()

bool llvm::GCNSubtarget::hasReadM0LdsDmaHazard ( ) const

inline

Definition at line 529 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasReadM0MovRelInterpHazard()

bool llvm::GCNSubtarget::hasReadM0MovRelInterpHazard ( ) const

inline

Definition at line 520 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasReadM0SendMsgHazard()

bool llvm::GCNSubtarget::hasReadM0SendMsgHazard ( ) const

inline

Definition at line 524 of file GCNSubtarget.h.

References getGeneration(), llvm::AMDGPUSubtarget::GFX9, and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasReadVCCZBug()

bool llvm::GCNSubtarget::hasReadVCCZBug ( ) const

inline

Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.

Definition at line 253 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ hasRFEHazards()

bool llvm::GCNSubtarget::hasRFEHazards ( ) const

inline

Definition at line 270 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasRrWGMode()

bool llvm::GCNSubtarget::hasRrWGMode ( ) const

inline

Definition at line 689 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScalarAddSub64()

bool llvm::GCNSubtarget::hasScalarAddSub64 ( ) const

inline

Definition at line 396 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScalarCompareEq64()

bool llvm::GCNSubtarget::hasScalarCompareEq64 ( ) const

inline

Definition at line 463 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasScalarMulHiInsts()

bool llvm::GCNSubtarget::hasScalarMulHiInsts ( ) const

inline

Definition at line 225 of file GCNSubtarget.h.

◆ hasScalarPackInsts()

bool llvm::GCNSubtarget::hasScalarPackInsts ( ) const

inline

Definition at line 223 of file GCNSubtarget.h.

◆ hasScalarSMulU64()

bool llvm::GCNSubtarget::hasScalarSMulU64 ( ) const

inline

Definition at line 398 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScalarSubwordLoads()

bool llvm::GCNSubtarget::hasScalarSubwordLoads ( ) const

inline

Definition at line 227 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScaleOffset()

bool llvm::GCNSubtarget::hasScaleOffset ( ) const

inline

Definition at line 502 of file GCNSubtarget.h.

◆ hasSCmpK()

bool llvm::GCNSubtarget::hasSCmpK ( ) const

inline

Definition at line 422 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScratchBaseForwardingHazard()

bool llvm::GCNSubtarget::hasScratchBaseForwardingHazard ( ) const

inline

Definition at line 967 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasSGetShaderCyclesInst()

bool llvm::GCNSubtarget::hasSGetShaderCyclesInst ( ) const

inline

Definition at line 713 of file GCNSubtarget.h.

◆ hasShift64HighRegBug()

bool llvm::GCNSubtarget::hasShift64HighRegBug ( ) const

inline

Definition at line 543 of file GCNSubtarget.h.

◆ hasSignedGVSOffset()

bool llvm::GCNSubtarget::hasSignedGVSOffset ( ) const

inline

Definition at line 505 of file GCNSubtarget.h.

◆ hasSignedScratchOffsets()

bool llvm::GCNSubtarget::hasSignedScratchOffsets ( ) const

inline

Returns: true if VADDR and SADDR fields in VSCRATCH can use negative values.

Definition at line 693 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasSMRDReadVALUDefHazard()

bool llvm::GCNSubtarget::hasSMRDReadVALUDefHazard ( ) const

inline

A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU instruction.

Definition at line 260 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS.

◆ hasSPackHL()

bool llvm::GCNSubtarget::hasSPackHL ( ) const

inline

Return true if the target has the S_PACK_HL_B32_B16 instruction.

Definition at line 601 of file GCNSubtarget.h.

◆ hasSplitBarriers()

bool llvm::GCNSubtarget::hasSplitBarriers ( ) const

inline

Definition at line 686 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasSubClampInsts()

bool llvm::GCNSubtarget::hasSubClampInsts ( ) const

inline

Definition at line 730 of file GCNSubtarget.h.

◆ hasSwap()

bool llvm::GCNSubtarget::hasSwap ( ) const

inline

Definition at line 221 of file GCNSubtarget.h.

◆ hasTransForwardingHazard()

bool llvm::GCNSubtarget::hasTransForwardingHazard ( ) const

inline

Definition at line 547 of file GCNSubtarget.h.

◆ hasUnalignedBufferAccessEnabled()

bool llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled ( ) const

inline

Definition at line 311 of file GCNSubtarget.h.

◆ hasUnalignedDSAccessEnabled()

bool llvm::GCNSubtarget::hasUnalignedDSAccessEnabled ( ) const

inline

Definition at line 315 of file GCNSubtarget.h.

◆ hasUnalignedScratchAccessEnabled()

bool llvm::GCNSubtarget::hasUnalignedScratchAccessEnabled ( ) const

inline

Definition at line 319 of file GCNSubtarget.h.

◆ hasUsableDivScaleConditionOutput()

bool llvm::GCNSubtarget::hasUsableDivScaleConditionOutput ( ) const

inline

Condition output from div_scale is usable.

Definition at line 247 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS.

◆ hasUsableDSOffset()

bool llvm::GCNSubtarget::hasUsableDSOffset ( ) const

inline

True if the offset field of DS instructions works as expected.

On SI, the offset uses a 16-bit adder and does not always wrap properly.

Definition at line 240 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ hasUserSGPRInit16BugInWave32()

bool llvm::GCNSubtarget::hasUserSGPRInit16BugInWave32 ( ) const

inline

Definition at line 509 of file GCNSubtarget.h.

References isWave32().

◆ hasVALUMaskWriteHazard()

bool llvm::GCNSubtarget::hasVALUMaskWriteHazard ( ) const

inline

Definition at line 587 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasVALUPartialForwardingHazard()

bool llvm::GCNSubtarget::hasVALUPartialForwardingHazard ( ) const

inline

Definition at line 571 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasVALUReadSGPRHazard()

bool llvm::GCNSubtarget::hasVALUReadSGPRHazard ( ) const

inline

Definition at line 589 of file GCNSubtarget.h.

◆ hasVDecCoExecHazard()

bool llvm::GCNSubtarget::hasVDecCoExecHazard ( ) const

inline

Definition at line 557 of file GCNSubtarget.h.

◆ hasVectorMulU64()

bool llvm::GCNSubtarget::hasVectorMulU64 ( ) const

inline

Definition at line 700 of file GCNSubtarget.h.

◆ hasVINTERPEncoding()

bool llvm::GCNSubtarget::hasVINTERPEncoding ( ) const

inline

Definition at line 359 of file GCNSubtarget.h.

◆ hasVMEMReadSGPRVALUDefHazard()

bool llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard ( ) const

inline

A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU Instruction.

Definition at line 266 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasVOP3DPP()

bool llvm::GCNSubtarget::hasVOP3DPP ( ) const

inline

Definition at line 565 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasVOPD3()

bool llvm::GCNSubtarget::hasVOPD3 ( ) const

inline

Definition at line 697 of file GCNSubtarget.h.

◆ haveRoundOpsF64()

bool llvm::GCNSubtarget::haveRoundOpsF64 ( ) const

inline

Have v_trunc_f64, v_ceil_f64, v_rndne_f64.

Definition at line 299 of file GCNSubtarget.h.

◆ initializeSubtargetDependencies()

GCNSubtarget & GCNSubtarget::initializeSubtargetDependencies	(	const Triple &	TT,
		StringRef	GPU,
		StringRef	FS )

Definition at line 57 of file GCNSubtarget.cpp.

Referenced by GCNSubtarget().

◆ isCuModeEnabled()

bool llvm::GCNSubtarget::isCuModeEnabled ( ) const

inline

Definition at line 327 of file GCNSubtarget.h.

◆ isDynamicVGPREnabled()

bool llvm::GCNSubtarget::isDynamicVGPREnabled ( ) const

inline

Definition at line 948 of file GCNSubtarget.h.

References DynamicVGPR.

Referenced by computeOccupancy(), and getMaxNumVGPRs().

◆ isGFX11Plus()

bool llvm::GCNSubtarget::isGFX11Plus ( ) const

inline

Definition at line 149 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ isMesaGfxShader()

bool llvm::GCNSubtarget::isMesaGfxShader ( const Function & F ) const

inline

Definition at line 401 of file GCNSubtarget.h.

References F, llvm::AMDGPUSubtarget::isMesa3DOS(), and llvm::AMDGPU::isShader().

◆ isPreciseMemoryEnabled()

bool llvm::GCNSubtarget::isPreciseMemoryEnabled ( ) const

inline

Definition at line 329 of file GCNSubtarget.h.

◆ isTgSplitEnabled()

bool llvm::GCNSubtarget::isTgSplitEnabled ( ) const

inline

Definition at line 325 of file GCNSubtarget.h.

Referenced by requiresWaitOnWorkgroupReleaseFence().

◆ isWave32()

bool llvm::GCNSubtarget::isWave32 ( ) const

inline

Definition at line 883 of file GCNSubtarget.h.

References llvm::AMDGPUSubtarget::getWavefrontSize().

Referenced by hasUserSGPRInit16BugInWave32(), mirFileLoaded(), and supportsWaveWideBPermute().

◆ isWave64()

bool llvm::GCNSubtarget::isWave64 ( ) const

inline

Definition at line 885 of file GCNSubtarget.h.

References llvm::AMDGPUSubtarget::getWavefrontSize().

Referenced by lowerWaveShuffle().

◆ isWaveSizeKnown()

bool llvm::GCNSubtarget::isWaveSizeKnown ( ) const

inline

Returns if the wavesize of this subtarget is known reliable.

This is false only for the a default target-cpu that does not have an explicit +wavefrontsize target feature.

Definition at line 890 of file GCNSubtarget.h.

◆ isXNACKEnabled()

bool llvm::GCNSubtarget::isXNACKEnabled ( ) const

inline

Definition at line 323 of file GCNSubtarget.h.

References TargetID.

Referenced by getBaseReservedNumSGPRs(), and needsConstrainedOpcode().

◆ ldsRequiresM0Init()

bool llvm::GCNSubtarget::ldsRequiresM0Init ( ) const

inline

Return if most LDS instructions have an m0 use that require m0 to be initialized.

Definition at line 384 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ loadStoreOptEnabled()

bool llvm::GCNSubtarget::loadStoreOptEnabled ( ) const

inline

Definition at line 507 of file GCNSubtarget.h.

References EnableLoadStoreOpt.

◆ maxHardClauseLength()

unsigned llvm::GCNSubtarget::maxHardClauseLength ( ) const

inline

Returns: The maximum number of instructions that can be enclosed in an S_CLAUSE on the given subtarget, or 0 for targets that do not support that instruction.

Definition at line 648 of file GCNSubtarget.h.

References MaxHardClauseLength.

◆ mirFileLoaded()

void GCNSubtarget::mirFileLoaded ( MachineFunction & MF ) const

override

Definition at line 388 of file GCNSubtarget.cpp.

References isWave32(), MBB, and MI.

◆ needsAlignedVGPRs()

bool llvm::GCNSubtarget::needsAlignedVGPRs ( ) const

inline

Return if operations acting on VGPR tuples require even alignment.

Definition at line 598 of file GCNSubtarget.h.

◆ needsKernArgPreloadProlog()

bool llvm::GCNSubtarget::needsKernArgPreloadProlog ( ) const

inline

Definition at line 724 of file GCNSubtarget.h.

◆ overridePostRASchedPolicy()

void GCNSubtarget::overridePostRASchedPolicy	(	MachineSchedPolicy &	Policy,
		const SchedRegion &	Region ) const

override

Definition at line 351 of file GCNSubtarget.cpp.

References llvm::dbgs(), F, llvm::Attribute::getValueAsString(), llvm::Attribute::isValid(), LLVM_DEBUG, llvm::MachineSchedPolicy::OnlyBottomUp, and llvm::MachineSchedPolicy::OnlyTopDown.

◆ overrideSchedPolicy()

void GCNSubtarget::overrideSchedPolicy	(	MachineSchedPolicy &	Policy,
		const SchedRegion &	Region ) const

override

Definition at line 334 of file GCNSubtarget.cpp.

References llvm::MachineSchedPolicy::OnlyBottomUp, llvm::MachineSchedPolicy::OnlyTopDown, llvm::MachineSchedPolicy::ShouldTrackLaneMasks, and llvm::MachineSchedPolicy::ShouldTrackPressure.

◆ ParseSubtargetFeatures()

void llvm::GCNSubtarget::ParseSubtargetFeatures	(	StringRef	CPU,
		StringRef	TuneCPU,
		StringRef	FS )

Referenced by initializeSubtargetDependencies().

◆ partialVCCWritesUpdateVCCZ()

bool llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ ( ) const

inline

Writes to VCC_LO/VCC_HI update the VCCZ flag.

Definition at line 256 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ privateMemoryResourceIsRangeChecked()

bool llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked ( ) const

inline

Returns: If MUBUF instructions always perform range checking, even for buffer resources used for private memory access.

Definition at line 303 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ requiresCodeObjectV6()

bool llvm::GCNSubtarget::requiresCodeObjectV6 ( ) const

inline

Definition at line 583 of file GCNSubtarget.h.

◆ requiresDisjointEarlyClobberAndUndef()

bool llvm::GCNSubtarget::requiresDisjointEarlyClobberAndUndef ( ) const

inlineoverride

Definition at line 953 of file GCNSubtarget.h.

◆ requiresNopBeforeDeallocVGPRs()

bool llvm::GCNSubtarget::requiresNopBeforeDeallocVGPRs ( ) const

inline

Definition at line 942 of file GCNSubtarget.h.

◆ requiresWaitIdleBeforeGetReg()

bool llvm::GCNSubtarget::requiresWaitIdleBeforeGetReg ( ) const

inline

Definition at line 946 of file GCNSubtarget.h.

◆ requiresWaitOnWorkgroupReleaseFence()

bool llvm::GCNSubtarget::requiresWaitOnWorkgroupReleaseFence ( ) const

inline

Definition at line 1014 of file GCNSubtarget.h.

References getGeneration(), llvm::AMDGPUSubtarget::GFX10, and isTgSplitEnabled().

◆ requiresWaitXCntForSingleAccessInstructions()

bool llvm::GCNSubtarget::requiresWaitXCntForSingleAccessInstructions ( ) const

inline

Returns: true if the subtarget requires a wait for xcnt before VMEM accesses that must never be repeated in the event of a page fault/re-try. Atomic stores/rmw and all volatile accesses fall under this criteria.

Definition at line 980 of file GCNSubtarget.h.

◆ setRegModeNeedsVNOPs()

bool llvm::GCNSubtarget::setRegModeNeedsVNOPs ( ) const

inline

Definition at line 593 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ setScalarizeGlobalBehavior()

void llvm::GCNSubtarget::setScalarizeGlobalBehavior ( bool b )

inline

Definition at line 440 of file GCNSubtarget.h.

References ScalarizeGlobal.

◆ shouldClusterStores()

bool llvm::GCNSubtarget::shouldClusterStores ( ) const

inline

Definition at line 934 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ supportsBPermute()

bool llvm::GCNSubtarget::supportsBPermute ( ) const

inline

Definition at line 994 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ supportsGetDoorbellID()

bool llvm::GCNSubtarget::supportsGetDoorbellID ( ) const

inline

Definition at line 233 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ supportsMinMaxDenormModes()

bool llvm::GCNSubtarget::supportsMinMaxDenormModes ( ) const

inline

Definition at line 282 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ supportsWave32()

bool llvm::GCNSubtarget::supportsWave32 ( ) const

inline

Definition at line 879 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ supportsWave64()

bool llvm::GCNSubtarget::supportsWave64 ( ) const

inline

Definition at line 881 of file GCNSubtarget.h.

◆ supportsWaveWideBPermute()

bool llvm::GCNSubtarget::supportsWaveWideBPermute ( ) const

inline

Definition at line 998 of file GCNSubtarget.h.

References getGeneration(), llvm::AMDGPUSubtarget::GFX12, llvm::AMDGPUSubtarget::GFX9, and isWave32().

Referenced by lowerWaveShuffle().

◆ supportsWGP()

bool llvm::GCNSubtarget::supportsWGP ( ) const

inline

Definition at line 191 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

Referenced by llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::getHSAKernelProps().

◆ unsafeDSOffsetFoldingEnabled()

bool llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled ( ) const

inline

Definition at line 242 of file GCNSubtarget.h.

◆ useAA()

bool GCNSubtarget::useAA ( ) const

override

Definition at line 407 of file GCNSubtarget.cpp.

References UseAA.

◆ useDS128()

bool llvm::GCNSubtarget::useDS128 ( ) const

inline

Returns: If target supports ds_read/write_b128 and user enables generation of ds_read/write_b128.

Definition at line 293 of file GCNSubtarget.h.

◆ usePRTStrictNull()

bool llvm::GCNSubtarget::usePRTStrictNull ( ) const

inline

Returns: If target requires PRT Struct NULL support (zero result registers for sparse texture support).

Definition at line 309 of file GCNSubtarget.h.

◆ useRealTrue16Insts()

bool llvm::GCNSubtarget::useRealTrue16Insts ( ) const

inline

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.

Fake True16 instructions are identical to non-fake ones except that they take 32-bit registers as operands and always use their low halves.

Definition at line 1010 of file GCNSubtarget.h.

◆ useVGPRBlockOpsForCSR()

bool llvm::GCNSubtarget::useVGPRBlockOpsForCSR ( ) const

inline

Definition at line 585 of file GCNSubtarget.h.

◆ useVGPRIndexMode()

bool GCNSubtarget::useVGPRIndexMode ( ) const

Definition at line 403 of file GCNSubtarget.cpp.

References EnableVGPRIndexMode.

◆ vmemWriteNeedsExpWaitcnt()

bool llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt ( ) const

inline

Definition at line 411 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ zeroesHigh16BitsOfDest()

bool GCNSubtarget::zeroesHigh16BitsOfDest ( unsigned Opcode ) const

Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicitly zeroes the high 16-bits, rather than preserve the original value.

This list was mostly derived from experimentation.

Definition at line 236 of file GCNSubtarget.cpp.

References getGeneration(), llvm::AMDGPUSubtarget::GFX9, and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

Member Data Documentation

◆ DynamicVGPR

bool llvm::GCNSubtarget::DynamicVGPR = false

protected

Definition at line 73 of file GCNSubtarget.h.

Referenced by isDynamicVGPREnabled().

◆ DynamicVGPRBlockSize32

bool llvm::GCNSubtarget::DynamicVGPRBlockSize32 = false

protected

Definition at line 74 of file GCNSubtarget.h.

Referenced by getDynamicVGPRBlockSize().

◆ Gen

unsigned llvm::GCNSubtarget::Gen = INVALID

protected

Definition at line 64 of file GCNSubtarget.h.

Referenced by getGeneration(), and initializeSubtargetDependencies().

◆ InstCacheLineSize

unsigned llvm::GCNSubtarget::InstCacheLineSize = 0

protected

Definition at line 70 of file GCNSubtarget.h.

Referenced by getInstCacheLineSize(), and initializeSubtargetDependencies().

◆ InstrItins

InstrItineraryData llvm::GCNSubtarget::InstrItins

protected

Definition at line 65 of file GCNSubtarget.h.

Referenced by GCNSubtarget(), and getInstrItineraryData().

◆ LDSBankCount

int llvm::GCNSubtarget::LDSBankCount = 0

protected

Definition at line 66 of file GCNSubtarget.h.

Referenced by getLDSBankCount(), and initializeSubtargetDependencies().

◆ MaxHardClauseLength

unsigned llvm::GCNSubtarget::MaxHardClauseLength = 0

protected

The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than the maximum argument to S_CLAUSE.

A value of 0 indicates a lack of S_CLAUSE support.

Definition at line 80 of file GCNSubtarget.h.

Referenced by hasHardClauses(), and maxHardClauseLength().

◆ MaxPrivateElementSize

unsigned llvm::GCNSubtarget::MaxPrivateElementSize = 0

protected

Definition at line 67 of file GCNSubtarget.h.

Referenced by getMaxPrivateElementSize(), and initializeSubtargetDependencies().

◆ ScalarizeGlobal

bool llvm::GCNSubtarget::ScalarizeGlobal = false

protected

Definition at line 75 of file GCNSubtarget.h.

Referenced by getScalarizeGlobalBehavior(), and setScalarizeGlobalBehavior().

◆ TargetID

AMDGPU::IsaInfo::AMDGPUTargetID llvm::GCNSubtarget::TargetID

protected

Definition at line 63 of file GCNSubtarget.h.

Referenced by d16PreservesUnusedBits(), GCNSubtarget(), getTargetID(), initializeSubtargetDependencies(), and isXNACKEnabled().

The documentation for this class was generated from the following files:

lib/Target/AMDGPU/GCNSubtarget.h
lib/Target/AMDGPU/GCNSubtarget.cpp

Public Types

Public Member Functions

Static Public Member Functions

Protected Attributes

Detailed Description

Member Enumeration Documentation

◆ TrapHandlerAbi

◆ TrapID

Constructor & Destructor Documentation

◆ GCNSubtarget()

◆ ~GCNSubtarget()

Member Function Documentation

◆ adjustSchedDependency()

◆ checkSubtargetFeatures()

◆ computeOccupancy()

◆ d16PreservesUnusedBits()

◆ enableEarlyIfConversion()

◆ enableMachineScheduler()

◆ enableSubRegLiveness()

◆ flatScratchIsPointer()

◆ getAddressableNumArchVGPRs()

◆ getAddressableNumSGPRs()

◆ getAddressableNumVGPRs()

◆ getBaseMaxNumSGPRs()

◆ getBaseMaxNumVGPRs()

◆ getBaseReservedNumSGPRs()

◆ getBoolRC()

◆ getCallLowering()

◆ getConstantBusLimit()

◆ getDynamicVGPRBlockSize()

◆ getFrameLowering()

◆ getGeneration()

◆ getInlineAsmLowering()

◆ getInstCacheLineSize()

◆ getInstrInfo()

◆ getInstrItineraryData()

◆ getInstructionSelector()

◆ getKnownHighZeroBitsForFrameIndex()

◆ getLDSBankCount()

◆ getLegalizerInfo()

◆ getMaxFlatWorkGroupSize()

◆ getMaxLocalMemSizeWithWaveCount()

◆ getMaxNumAGPRs()

◆ getMaxNumPreloadedSGPRs()

◆ getMaxNumSGPRs() [1/3]

◆ getMaxNumSGPRs() [2/3]

◆ getMaxNumSGPRs() [3/3]

◆ getMaxNumUserSGPRs()

◆ getMaxNumVectorRegs()

◆ getMaxNumVGPRs() [1/3]

◆ getMaxNumVGPRs() [2/3]

◆ getMaxNumVGPRs() [3/3]

◆ getMaxPrivateElementSize()

◆ getMaxWaveScratchSize()

◆ getMaxWavesPerEU()

◆ getMaxWorkGroupsPerCU()

◆ getMinFlatWorkGroupSize()

◆ getMinNumSGPRs()

◆ getMinNumVGPRs()

◆ getMinWavesPerEU()

◆ getNSAMaxSize()

◆ getNSAThreshold()

◆ getOccupancyWithNumSGPRs()

◆ getOccupancyWithNumVGPRs()

◆ getRegBankInfo()

◆ getRegisterInfo()

◆ getReservedNumSGPRs() [1/2]

◆ getReservedNumSGPRs() [2/2]

◆ getScalarizeGlobalBehavior()

◆ getSelectionDAGInfo()

◆ getSetRegWaitStates()

◆ getSGPRAllocGranule()

◆ getSGPREncodingGranule()

◆ getSNopBits()

◆ getStackAlignment()

◆ getTargetID()

◆ getTargetLowering()

◆ getTotalNumSGPRs()

◆ getTotalNumVGPRs()

◆ getTrapHandlerAbi()