#include "Target/AMDGPU/AMDGPUSubtarget.h"

Inheritance diagram for llvm::AMDGPUSubtarget:

Public Types
enum	Generation { INVALID = 0 , R600 = 1 , R700 = 2 , EVERGREEN = 3 , NORTHERN_ISLANDS = 4 , SOUTHERN_ISLANDS = 5 , SEA_ISLANDS = 6 , VOLCANIC_ISLANDS = 7 , GFX9 = 8 , GFX10 = 9 , GFX11 = 10 , GFX12 = 11 }

Public Member Functions
	AMDGPUSubtarget (Triple TT)

std::pair< unsigned, unsigned >	getDefaultFlatWorkGroupSize (CallingConv::ID CC) const

std::pair< unsigned, unsigned >	getFlatWorkGroupSizes (const Function &F) const

std::pair< unsigned, unsigned >	getWavesPerEU (const Function &F) const

std::pair< unsigned, unsigned >	getWavesPerEU (const Function &F, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
	Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.

std::pair< unsigned, unsigned >	getEffectiveWavesPerEU (std::pair< unsigned, unsigned > WavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const

unsigned	getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
	Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

unsigned	getOccupancyWithLocalMemSize (uint32_t Bytes, const Function &) const
	Inverse of getMaxLocalMemWithWaveCount.

unsigned	getOccupancyWithLocalMemSize (const MachineFunction &MF) const

bool	isAmdHsaOS () const

bool	isAmdPalOS () const

bool	isMesa3DOS () const

bool	isMesaKernel (const Function &F) const

bool	isAmdHsaOrMesa (const Function &F) const

bool	isGCN () const

bool	isGCN3Encoding () const

bool	has16BitInsts () const

bool	hasTrue16BitInsts () const
	Return true if the subtarget supports True16 instructions.

bool	useRealTrue16Insts () const
	Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.

bool	hasMadMixInsts () const

bool	hasMadMacF32Insts () const

bool	hasDsSrc2Insts () const

bool	hasSDWA () const

bool	hasVOP3PInsts () const

bool	hasMulI24 () const

bool	hasMulU24 () const

bool	hasSMulHi () const

bool	hasInv2PiInlineImm () const

bool	hasFminFmaxLegacy () const

bool	hasTrigReducedRange () const

bool	hasFastFMAF32 () const

bool	isPromoteAllocaEnabled () const

unsigned	getWavefrontSize () const

unsigned	getWavefrontSizeLog2 () const

unsigned	getLocalMemorySize () const

unsigned	getAddressableLocalMemorySize () const

unsigned	getEUsPerCU () const
	Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.

Align	getAlignmentForImplicitArgPtr () const

unsigned	getExplicitKernelArgOffset () const
	Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.

virtual unsigned	getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const =0

virtual unsigned	getMinFlatWorkGroupSize () const =0

virtual unsigned	getMaxFlatWorkGroupSize () const =0

virtual unsigned	getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const =0

virtual unsigned	getMinWavesPerEU () const =0

unsigned	getMaxWavesPerEU () const

unsigned	getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const
	Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.

SmallVector< unsigned >	getMaxNumWorkGroups (const Function &F) const
	Return the number of work groups for the function.

bool	isSingleLaneExecution (const Function &Kernel) const
	Return true if only a single workitem can be active in a wave.

bool	makeLIDRangeMetadata (Instruction *I) const
	Creates value range metadata on an workitemid.* intrinsic call or load.

unsigned	getImplicitArgNumBytes (const Function &F) const

uint64_t	getExplicitKernArgSize (const Function &F, Align &MaxAlign) const

unsigned	getKernArgSegmentSize (const Function &F, Align &MaxAlign) const

AMDGPUDwarfFlavour	getAMDGPUDwarfFlavour () const

virtual	~AMDGPUSubtarget ()=default

Static Public Member Functions
static const AMDGPUSubtarget &	get (const MachineFunction &MF)

static const AMDGPUSubtarget &	get (const TargetMachine &TM, const Function &F)

Protected Attributes
bool	GCN3Encoding = false

bool	Has16BitInsts = false

bool	HasTrue16BitInsts = false

bool	EnableRealTrue16Insts = false

bool	HasMadMixInsts = false

bool	HasMadMacF32Insts = false

bool	HasDsSrc2Insts = false

bool	HasSDWA = false

bool	HasVOP3PInsts = false

bool	HasMulI24 = true

bool	HasMulU24 = true

bool	HasSMulHi = false

bool	HasInv2PiInlineImm = false

bool	HasFminFmaxLegacy = true

bool	EnablePromoteAlloca = false

bool	HasTrigReducedRange = false

bool	FastFMAF32 = false

unsigned	EUsPerCU = 4

unsigned	MaxWavesPerEU = 10

unsigned	LocalMemorySize = 0

unsigned	AddressableLocalMemorySize = 0

char	WavefrontSizeLog2 = 0

Detailed Description

Definition at line 29 of file AMDGPUSubtarget.h.

Member Enumeration Documentation

◆ Generation

enum llvm::AMDGPUSubtarget::Generation

Enumerator
INVALID
R600
R700
EVERGREEN
NORTHERN_ISLANDS
SOUTHERN_ISLANDS
SEA_ISLANDS
VOLCANIC_ISLANDS
GFX9
GFX10
GFX11
GFX12

Definition at line 31 of file AMDGPUSubtarget.h.

Constructor & Destructor Documentation

◆ AMDGPUSubtarget()

AMDGPUSubtarget::AMDGPUSubtarget ( Triple TT )

Definition at line 186 of file AMDGPUSubtarget.cpp.

◆ ~AMDGPUSubtarget()

virtual llvm::AMDGPUSubtarget::~AMDGPUSubtarget ( )

virtualdefault

Member Function Documentation

◆ get() [1/2]

const AMDGPUSubtarget & AMDGPUSubtarget::get ( const MachineFunction & MF )

static

Definition at line 1038 of file AMDGPUSubtarget.cpp.

References llvm::Triple::amdgcn, llvm::Triple::getArch(), llvm::MachineFunction::getSubtarget(), llvm::MachineFunction::getTarget(), and llvm::TargetMachine::getTargetTriple().

Referenced by llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute().

◆ get() [2/2]

const AMDGPUSubtarget & AMDGPUSubtarget::get	(	const TargetMachine &	TM,
		const Function &	F
	)

static

Definition at line 1044 of file AMDGPUSubtarget.cpp.

References llvm::Triple::amdgcn, F, and TM.

◆ getAddressableLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getAddressableLocalMemorySize ( ) const

inline

Definition at line 233 of file AMDGPUSubtarget.h.

References AddressableLocalMemorySize.

◆ getAlignmentForImplicitArgPtr()

Align llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr ( ) const

inline

Definition at line 242 of file AMDGPUSubtarget.h.

References isAmdHsaOS().

Referenced by llvm::AMDGPUTargetLowering::getImplicitParameterOffset(), and getKernArgSegmentSize().

◆ getAMDGPUDwarfFlavour()

AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour ( ) const

Returns: Corresponding DWARF register number mapping flavour for the WavefrontSize.

Definition at line 648 of file AMDGPUSubtarget.cpp.

References getWavefrontSize(), llvm::Wave32, and llvm::Wave64.

◆ getDefaultFlatWorkGroupSize()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getDefaultFlatWorkGroupSize ( CallingConv::ID CC ) const

Returns: Default range flat work group size for a calling convention.

Definition at line 410 of file AMDGPUSubtarget.cpp.

References llvm::CallingConv::AMDGPU_ES, llvm::CallingConv::AMDGPU_GS, llvm::CallingConv::AMDGPU_HS, llvm::CallingConv::AMDGPU_LS, llvm::CallingConv::AMDGPU_PS, llvm::CallingConv::AMDGPU_VS, CC, getMaxFlatWorkGroupSize(), and getWavefrontSize().

Referenced by getFlatWorkGroupSizes().

◆ getEffectiveWavesPerEU()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getEffectiveWavesPerEU	(	std::pair< unsigned, unsigned >	WavesPerEU,
		std::pair< unsigned, unsigned >	FlatWorkGroupSizes
	)		const

Definition at line 447 of file AMDGPUSubtarget.cpp.

References llvm::Default, getMaxWavesPerEU(), getMinWavesPerEU(), and getWavesPerEUForWorkGroup().

Referenced by getWavesPerEU().

◆ getEUsPerCU()

unsigned llvm::AMDGPUSubtarget::getEUsPerCU ( ) const

inline

Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.

This takes WGP mode vs. CU mode into account.

Definition at line 240 of file AMDGPUSubtarget.h.

References EUsPerCU.

Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithLocalMemSize().

◆ getExplicitKernArgSize()

uint64_t AMDGPUSubtarget::getExplicitKernArgSize	(	const Function &	F,
		Align &	MaxAlign
	)		const

Definition at line 604 of file AMDGPUSubtarget.cpp.

References llvm::alignTo(), llvm::CallingConv::AMDGPU_KERNEL, assert(), DL, F, and llvm::CallingConv::SPIR_KERNEL.

Referenced by getKernArgSegmentSize().

◆ getExplicitKernelArgOffset()

unsigned llvm::AMDGPUSubtarget::getExplicitKernelArgOffset ( ) const

inline

Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.

Definition at line 248 of file AMDGPUSubtarget.h.

References llvm::Triple::AMDHSA, llvm::Triple::AMDPAL, llvm::Triple::getOS(), llvm_unreachable, llvm::Triple::Mesa3D, and llvm::Triple::UnknownOS.

Referenced by llvm::AMDGPUTargetLowering::getImplicitParameterOffset(), getKernArgSegmentSize(), and llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel().

◆ getFlatWorkGroupSizes()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getFlatWorkGroupSizes ( const Function & F ) const

Returns: Subtarget's default pair of minimum/maximum flat work group sizes for function F, or minimum/maximum flat work group sizes explicitly requested using "amdgpu-flat-work-group-size" attribute attached to function F.; Subtarget's default values if explicitly requested values cannot be converted to integer, or violate subtarget's specifications.

Definition at line 424 of file AMDGPUSubtarget.cpp.

References llvm::Default, F, getDefaultFlatWorkGroupSize(), llvm::AMDGPU::getIntegerPairAttribute(), getMaxFlatWorkGroupSize(), and getMinFlatWorkGroupSize().

Referenced by getMaxLocalMemSizeWithWaveCount(), getMaxWorkitemID(), getOccupancyWithLocalMemSize(), getWavesPerEU(), and makeLIDRangeMetadata().

◆ getImplicitArgNumBytes()

unsigned AMDGPUSubtarget::getImplicitArgNumBytes ( const Function & F ) const

Returns: Number of bytes of arguments that are passed to a shader or kernel in addition to the explicit ones declared for the function.

Definition at line 585 of file AMDGPUSubtarget.cpp.

References llvm::AMDGPU::AMDHSA_COV5, assert(), F, llvm::AMDGPU::getAMDHSACodeObjectVersion(), llvm::AMDGPU::isKernel(), and isMesaKernel().

Referenced by getKernArgSegmentSize().

◆ getKernArgSegmentSize()

unsigned AMDGPUSubtarget::getKernArgSegmentSize	(	const Function &	F,
		Align &	MaxAlign
	)		const

Definition at line 626 of file AMDGPUSubtarget.cpp.

References llvm::alignTo(), llvm::CallingConv::AMDGPU_KERNEL, F, getAlignmentForImplicitArgPtr(), getExplicitKernArgSize(), getExplicitKernelArgOffset(), getImplicitArgNumBytes(), and llvm::CallingConv::SPIR_KERNEL.

Referenced by llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::getHSAKernelProps().

◆ getLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getLocalMemorySize ( ) const

inline

Definition at line 229 of file AMDGPUSubtarget.h.

References LocalMemorySize.

Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithLocalMemSize().

◆ getMaxFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize ( ) const

pure virtual

Returns: Maximum flat work group size supported by the subtarget.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getDefaultFlatWorkGroupSize(), and getFlatWorkGroupSizes().

◆ getMaxLocalMemSizeWithWaveCount()

unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount	(	unsigned	WaveCount,
		const Function &	F
	)		const

Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

Definition at line 345 of file AMDGPUSubtarget.cpp.

References F, getEUsPerCU(), getFlatWorkGroupSizes(), getLocalMemorySize(), and getWavefrontSize().

◆ getMaxNumWorkGroups()

SmallVector< unsigned > AMDGPUSubtarget::getMaxNumWorkGroups ( const Function & F ) const

Return the number of work groups for the function.

Definition at line 1131 of file AMDGPUSubtarget.cpp.

References F, and llvm::AMDGPU::getIntegerVecAttribute().

◆ getMaxWavesPerEU()

unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( ) const

inline

Returns: Maximum number of waves per execution unit supported by the subtarget without any kind of limitation.

Definition at line 285 of file AMDGPUSubtarget.h.

References MaxWavesPerEU.

Referenced by getEffectiveWavesPerEU(), getOccupancyWithLocalMemSize(), and getWavesPerEU().

◆ getMaxWorkGroupsPerCU()

virtual unsigned llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU ( unsigned FlatWorkGroupSize ) const

pure virtual

Returns: Maximum number of work groups per compute unit supported by the subtarget and limited by given FlatWorkGroupSize.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getOccupancyWithLocalMemSize().

◆ getMaxWorkitemID()

unsigned AMDGPUSubtarget::getMaxWorkitemID	(	const Function &	Kernel,
		unsigned	Dimension
	)		const

Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.

Definition at line 500 of file AMDGPUSubtarget.cpp.

References getFlatWorkGroupSizes(), and getReqdWorkGroupSize().

Referenced by llvm::AMDGPUTargetLowering::computeKnownBitsForTargetNode(), llvm::GCNTTIImpl::isAlwaysUniform(), isSingleLaneExecution(), llvm::AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic(), and llvm::SITargetLowering::passSpecialInputs().

◆ getMinFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize ( ) const

pure virtual

Returns: Minimum flat work group size supported by the subtarget.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getFlatWorkGroupSizes().

◆ getMinWavesPerEU()

virtual unsigned llvm::AMDGPUSubtarget::getMinWavesPerEU ( ) const

pure virtual

Returns: Minimum number of waves per execution unit supported by the subtarget.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getEffectiveWavesPerEU().

◆ getOccupancyWithLocalMemSize() [1/2]

unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize ( const MachineFunction & MF ) const

Definition at line 404 of file AMDGPUSubtarget.cpp.

References llvm::MachineFunction::getFunction(), llvm::MachineFunction::getInfo(), and getOccupancyWithLocalMemSize().

◆ getOccupancyWithLocalMemSize() [2/2]

unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize	(	uint32_t	Bytes,
		const Function &	F
	)		const

Inverse of getMaxLocalMemWithWaveCount.

Return the maximum wavecount if the given LDS memory size is the only constraint.

Definition at line 364 of file AMDGPUSubtarget.cpp.

References assert(), llvm::divideCeil(), F, getEUsPerCU(), getFlatWorkGroupSizes(), getLocalMemorySize(), getMaxWavesPerEU(), getMaxWorkGroupsPerCU(), and getWavefrontSize().

Referenced by llvm::GCNSchedStage::checkScheduling(), llvm::GCNSubtarget::computeOccupancy(), getOccupancyWithLocalMemSize(), and llvm::SIRegisterInfo::getRegPressureLimit().

◆ getWavefrontSize()

unsigned llvm::AMDGPUSubtarget::getWavefrontSize ( ) const

inline

Definition at line 221 of file AMDGPUSubtarget.h.

References WavefrontSizeLog2.

Referenced by llvm::SIRegisterInfo::buildSpillLoadStore(), getAMDGPUDwarfFlavour(), getDefaultFlatWorkGroupSize(), llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::getHSAKernelProps(), getMaxLocalMemSizeWithWaveCount(), getOccupancyWithLocalMemSize(), llvm::SITargetLowering::getRegClassFor(), llvm::SIInstrInfo::getScratchRsrcWords23(), llvm::PhiLoweringHelper::isLaneMaskReg(), llvm::GCNSubtarget::isWave32(), llvm::GCNSubtarget::isWave64(), llvm::AMDGPULegalizerInfo::legalizeIntrinsic(), llvm::SITargetLowering::LowerCall(), lowerFCMPIntrinsic(), lowerICMPIntrinsic(), and llvm::SITargetLowering::requiresUniformRegister().

◆ getWavefrontSizeLog2()

unsigned llvm::AMDGPUSubtarget::getWavefrontSizeLog2 ( ) const

inline

Definition at line 225 of file AMDGPUSubtarget.h.

References WavefrontSizeLog2.

Referenced by llvm::SIRegisterInfo::eliminateFrameIndex(), llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex(), llvm::GCNTTIImpl::isAlwaysUniform(), and llvm::SITargetLowering::lowerDYNAMIC_STACKALLOCImpl().

◆ getWavesPerEU() [1/2]

std::pair< unsigned, unsigned > llvm::AMDGPUSubtarget::getWavesPerEU ( const Function & F ) const

inline

Returns: Subtarget's default pair of minimum/maximum number of waves per execution unit for function F, or minimum/maximum number of waves per execution unit explicitly requested using "amdgpu-waves-per-eu" attribute attached to function F.; Subtarget's default values if explicitly requested values cannot be converted to integer, violate subtarget's specifications, or are not compatible with minimum/maximum number of waves limited by flat work group size, register usage, and/or lds usage.

Definition at line 101 of file AMDGPUSubtarget.h.

References F, getFlatWorkGroupSizes(), and getWavesPerEU().

Referenced by llvm::GCNSubtarget::getMaxNumSGPRs(), llvm::GCNSubtarget::getMaxNumVGPRs(), and getWavesPerEU().

◆ getWavesPerEU() [2/2]

std::pair< unsigned, unsigned > AMDGPUSubtarget::getWavesPerEU	(	const Function &	F,
		std::pair< unsigned, unsigned >	FlatWorkGroupSizes
	)		const

Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.

FlatWorkGroupSizes Should correspond to the function's value for getFlatWorkGroupSizes.

Definition at line 478 of file AMDGPUSubtarget.cpp.

References llvm::Default, F, getEffectiveWavesPerEU(), llvm::AMDGPU::getIntegerPairAttribute(), and getMaxWavesPerEU().

◆ getWavesPerEUForWorkGroup()

virtual unsigned llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup ( unsigned FlatWorkGroupSize ) const

pure virtual

Returns: Number of waves per execution unit required to support the given FlatWorkGroupSize.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getEffectiveWavesPerEU().

◆ has16BitInsts()

bool llvm::AMDGPUSubtarget::has16BitInsts ( ) const

inline

Definition at line 154 of file AMDGPUSubtarget.h.

References Has16BitInsts.

◆ hasDsSrc2Insts()

bool llvm::AMDGPUSubtarget::hasDsSrc2Insts ( ) const

inline

Definition at line 177 of file AMDGPUSubtarget.h.

References HasDsSrc2Insts.

◆ hasFastFMAF32()

bool llvm::AMDGPUSubtarget::hasFastFMAF32 ( ) const

inline

Definition at line 213 of file AMDGPUSubtarget.h.

References FastFMAF32.

Referenced by llvm::GCNTTIImpl::getIntrinsicInstrCost(), llvm::SITargetLowering::isFMAFasterThanFMulAndFAdd(), llvm::AMDGPULegalizerInfo::legalizeFExp(), llvm::AMDGPULegalizerInfo::legalizeFlogCommon(), llvm::AMDGPULegalizerInfo::legalizeFlogUnsafe(), llvm::AMDGPUTargetLowering::lowerFEXP(), llvm::AMDGPUTargetLowering::LowerFLOGCommon(), and llvm::AMDGPUTargetLowering::LowerFLOGUnsafe().

◆ hasFminFmaxLegacy()

bool llvm::AMDGPUSubtarget::hasFminFmaxLegacy ( ) const

inline

Definition at line 205 of file AMDGPUSubtarget.h.

References HasFminFmaxLegacy.

Referenced by llvm::AMDGPUTargetLowering::performSelectCombine().

◆ hasInv2PiInlineImm()

bool llvm::AMDGPUSubtarget::hasInv2PiInlineImm ( ) const

inline

Definition at line 201 of file AMDGPUSubtarget.h.

References HasInv2PiInlineImm.

Referenced by llvm::SITargetLowering::checkAsmConstraintValA(), llvm::SIRegisterInfo::eliminateFrameIndex(), llvm::AMDGPUTargetLowering::getConstantNegateCost(), llvm::SIInstrInfo::isInlineConstant(), and llvm::SIInstrInfo::isOperandLegal().

◆ hasMadMacF32Insts()

bool llvm::AMDGPUSubtarget::hasMadMacF32Insts ( ) const

inline

Definition at line 173 of file AMDGPUSubtarget.h.

References HasMadMacF32Insts, and isGCN().

Referenced by llvm::AMDGPULegalizerInfo::AMDGPULegalizerInfo(), llvm::GCNTTIImpl::getArithmeticInstrCost(), llvm::SITargetLowering::isFMADLegal(), llvm::SITargetLowering::isFMAFasterThanFMulAndFAdd(), llvm::AMDGPUTargetLowering::LowerDIVREM24(), llvm::AMDGPUTargetLowering::LowerUDIVREM64(), and llvm::SITargetLowering::SITargetLowering().

◆ hasMadMixInsts()

bool llvm::AMDGPUSubtarget::hasMadMixInsts ( ) const

inline

Definition at line 169 of file AMDGPUSubtarget.h.

References HasMadMixInsts.

Referenced by llvm::SITargetLowering::isFPExtFoldable().

◆ hasMulI24()

bool llvm::AMDGPUSubtarget::hasMulI24 ( ) const

inline

Definition at line 189 of file AMDGPUSubtarget.h.

References HasMulI24.

Referenced by llvm::AMDGPUTargetLowering::performMulCombine(), llvm::AMDGPUTargetLowering::performMulhsCombine(), and llvm::AMDGPUTargetLowering::performMulLoHiCombine().

◆ hasMulU24()

bool llvm::AMDGPUSubtarget::hasMulU24 ( ) const

inline

Definition at line 193 of file AMDGPUSubtarget.h.

References HasMulU24.

Referenced by llvm::AMDGPUTargetLowering::performMulCombine(), llvm::AMDGPUTargetLowering::performMulhuCombine(), and llvm::AMDGPUTargetLowering::performMulLoHiCombine().

◆ hasSDWA()

bool llvm::AMDGPUSubtarget::hasSDWA ( ) const

inline

Definition at line 181 of file AMDGPUSubtarget.h.

References HasSDWA.

Referenced by llvm::AMDGPUTargetLowering::PerformDAGCombine(), and llvm::SIInstrInfo::verifyInstruction().

◆ hasSMulHi()

bool llvm::AMDGPUSubtarget::hasSMulHi ( ) const

inline

Definition at line 197 of file AMDGPUSubtarget.h.

References HasSMulHi.

Referenced by llvm::AMDGPURegisterBankInfo::applyMappingMAD_64_32(), llvm::AMDGPUTargetLowering::performMulhsCombine(), and llvm::AMDGPUTargetLowering::performMulhuCombine().

◆ hasTrigReducedRange()

bool llvm::AMDGPUSubtarget::hasTrigReducedRange ( ) const

inline

Definition at line 209 of file AMDGPUSubtarget.h.

References HasTrigReducedRange.

Referenced by llvm::AMDGPULegalizerInfo::legalizeSinCos().

◆ hasTrue16BitInsts()

bool llvm::AMDGPUSubtarget::hasTrue16BitInsts ( ) const

inline

Return true if the subtarget supports True16 instructions.

Definition at line 159 of file AMDGPUSubtarget.h.

References HasTrue16BitInsts.

Referenced by llvm::SIInstrInfo::convertToThreeAddress(), llvm::SIInstrInfo::copyPhysReg(), llvm::SIInstrInfo::foldImmediate(), and useRealTrue16Insts().

◆ hasVOP3PInsts()

bool llvm::AMDGPUSubtarget::hasVOP3PInsts ( ) const

inline

Definition at line 185 of file AMDGPUSubtarget.h.

References HasVOP3PInsts.

Referenced by llvm::AMDGPULegalizerInfo::AMDGPULegalizerInfo(), llvm::GCNTTIImpl::getArithmeticReductionCost(), llvm::GCNTTIImpl::getMinMaxReductionCost(), llvm::GCNTTIImpl::getShuffleCost(), and llvm::SITargetLowering::SITargetLowering().

◆ isAmdHsaOrMesa()

bool llvm::AMDGPUSubtarget::isAmdHsaOrMesa ( const Function & F ) const

inline

Definition at line 142 of file AMDGPUSubtarget.h.

References F, isAmdHsaOS(), and isMesaKernel().

◆ isAmdHsaOS()

bool llvm::AMDGPUSubtarget::isAmdHsaOS ( ) const

inline

Definition at line 128 of file AMDGPUSubtarget.h.

References llvm::Triple::AMDHSA, and llvm::Triple::getOS().

Referenced by getAlignmentForImplicitArgPtr(), llvm::SIInstrInfo::getDefaultRsrcDataFormat(), llvm::GCNSubtarget::getTrapHandlerAbi(), llvm::GCNSubtarget::initializeSubtargetDependencies(), isAmdHsaOrMesa(), llvm::SITargetLowering::isOffsetFoldingLegal(), llvm::SITargetLowering::LowerFormalArguments(), and llvm::AMDGPUAsmPrinter::runOnMachineFunction().

◆ isAmdPalOS()

bool llvm::AMDGPUSubtarget::isAmdPalOS ( ) const

inline

Definition at line 132 of file AMDGPUSubtarget.h.

References llvm::Triple::AMDPAL, and llvm::Triple::getOS().

Referenced by llvm::AMDGPULegalizerInfo::legalizeGlobalValue(), llvm::SITargetLowering::LowerFormalArguments(), llvm::AMDGPUAsmPrinter::runOnMachineFunction(), and llvm::SITargetLowering::shouldEmitGOTReloc().

◆ isGCN()

bool llvm::AMDGPUSubtarget::isGCN ( ) const

inline

Definition at line 146 of file AMDGPUSubtarget.h.

References llvm::Triple::amdgcn, and llvm::Triple::getArch().

Referenced by hasMadMacF32Insts(), llvm::AMDGPUTargetLowering::LowerDIVREM24(), and llvm::AMDGPUTargetLowering::LowerINT_TO_FP32().

◆ isGCN3Encoding()

bool llvm::AMDGPUSubtarget::isGCN3Encoding ( ) const

inline

Definition at line 150 of file AMDGPUSubtarget.h.

References GCN3Encoding.

◆ isMesa3DOS()

bool llvm::AMDGPUSubtarget::isMesa3DOS ( ) const

inline

Definition at line 136 of file AMDGPUSubtarget.h.

References llvm::Triple::getOS(), and llvm::Triple::Mesa3D.

Referenced by llvm::GCNSubtarget::isMesaGfxShader(), isMesaKernel(), llvm::AMDGPULegalizerInfo::legalizeGlobalValue(), and llvm::SITargetLowering::shouldEmitGOTReloc().

◆ isMesaKernel()

bool AMDGPUSubtarget::isMesaKernel ( const Function & F ) const

Definition at line 496 of file AMDGPUSubtarget.cpp.

References F, isMesa3DOS(), and llvm::AMDGPU::isShader().

Referenced by getImplicitArgNumBytes(), and isAmdHsaOrMesa().

◆ isPromoteAllocaEnabled()

bool llvm::AMDGPUSubtarget::isPromoteAllocaEnabled ( ) const

inline

Definition at line 217 of file AMDGPUSubtarget.h.

References EnablePromoteAlloca.

◆ isSingleLaneExecution()

bool AMDGPUSubtarget::isSingleLaneExecution ( const Function & Kernel ) const

Return true if only a single workitem can be active in a wave.

Definition at line 508 of file AMDGPUSubtarget.cpp.

References getMaxWorkitemID(), and I.

Referenced by llvm::GCNTTIImpl::hasBranchDivergence().

◆ makeLIDRangeMetadata()

bool AMDGPUSubtarget::makeLIDRangeMetadata ( Instruction * I ) const

Creates value range metadata on an workitemid.* intrinsic call or load.

Definition at line 517 of file AMDGPUSubtarget.cpp.

References llvm::MDBuilder::createRange(), F, getFlatWorkGroupSizes(), getReqdWorkGroupSize(), I, if(), llvm::Lower, Range, and llvm::Upper.

◆ useRealTrue16Insts()

bool AMDGPUSubtarget::useRealTrue16Insts ( ) const

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.

Fake True16 instructions are identical to non-fake ones except that they take 32-bit registers as operands and always use their low halves.

Definition at line 188 of file AMDGPUSubtarget.cpp.

References EnableRealTrue16Insts, and hasTrue16BitInsts().

Referenced by llvm::SIRegisterInfo::getRegClassForSizeOnBank(), llvm::SIInstrInfo::getVALUOp(), llvm::SIInstrInfo::moveToVALUImpl(), and llvm::SITargetLowering::SITargetLowering().

Member Data Documentation

◆ AddressableLocalMemorySize

unsigned llvm::AMDGPUSubtarget::AddressableLocalMemorySize = 0

protected

Definition at line 70 of file AMDGPUSubtarget.h.

Referenced by getAddressableLocalMemorySize(), llvm::GCNSubtarget::initializeSubtargetDependencies(), and llvm::R600Subtarget::R600Subtarget().

◆ EnablePromoteAlloca

bool llvm::AMDGPUSubtarget::EnablePromoteAlloca = false

protected

Definition at line 64 of file AMDGPUSubtarget.h.

Referenced by isPromoteAllocaEnabled().

◆ EnableRealTrue16Insts

bool llvm::AMDGPUSubtarget::EnableRealTrue16Insts = false

protected

Definition at line 53 of file AMDGPUSubtarget.h.

Referenced by useRealTrue16Insts().

◆ EUsPerCU

unsigned llvm::AMDGPUSubtarget::EUsPerCU = 4

protected

Definition at line 67 of file AMDGPUSubtarget.h.

Referenced by llvm::GCNSubtarget::GCNSubtarget(), and getEUsPerCU().

◆ FastFMAF32

bool llvm::AMDGPUSubtarget::FastFMAF32 = false

protected

Definition at line 66 of file AMDGPUSubtarget.h.

Referenced by hasFastFMAF32().

◆ GCN3Encoding

bool llvm::AMDGPUSubtarget::GCN3Encoding = false

protected

Definition at line 50 of file AMDGPUSubtarget.h.

Referenced by isGCN3Encoding().

◆ Has16BitInsts

bool llvm::AMDGPUSubtarget::Has16BitInsts = false

protected

Definition at line 51 of file AMDGPUSubtarget.h.

Referenced by has16BitInsts().

◆ HasDsSrc2Insts

bool llvm::AMDGPUSubtarget::HasDsSrc2Insts = false

protected

Definition at line 56 of file AMDGPUSubtarget.h.

Referenced by hasDsSrc2Insts().

◆ HasFminFmaxLegacy

bool llvm::AMDGPUSubtarget::HasFminFmaxLegacy = true

protected

Definition at line 63 of file AMDGPUSubtarget.h.

Referenced by hasFminFmaxLegacy(), and llvm::GCNSubtarget::initializeSubtargetDependencies().

◆ HasInv2PiInlineImm

bool llvm::AMDGPUSubtarget::HasInv2PiInlineImm = false

protected

Definition at line 62 of file AMDGPUSubtarget.h.

Referenced by hasInv2PiInlineImm().

◆ HasMadMacF32Insts

bool llvm::AMDGPUSubtarget::HasMadMacF32Insts = false

protected

Definition at line 55 of file AMDGPUSubtarget.h.

Referenced by hasMadMacF32Insts().

◆ HasMadMixInsts

bool llvm::AMDGPUSubtarget::HasMadMixInsts = false

protected

Definition at line 54 of file AMDGPUSubtarget.h.

Referenced by hasMadMixInsts().

◆ HasMulI24

bool llvm::AMDGPUSubtarget::HasMulI24 = true

protected

Definition at line 59 of file AMDGPUSubtarget.h.

Referenced by hasMulI24(), and llvm::R600Subtarget::initializeSubtargetDependencies().

◆ HasMulU24

bool llvm::AMDGPUSubtarget::HasMulU24 = true

protected

Definition at line 60 of file AMDGPUSubtarget.h.

Referenced by hasMulU24(), and llvm::R600Subtarget::initializeSubtargetDependencies().

◆ HasSDWA

bool llvm::AMDGPUSubtarget::HasSDWA = false

protected

Definition at line 57 of file AMDGPUSubtarget.h.

Referenced by hasSDWA().

◆ HasSMulHi

bool llvm::AMDGPUSubtarget::HasSMulHi = false

protected

Definition at line 61 of file AMDGPUSubtarget.h.

Referenced by hasSMulHi(), and llvm::GCNSubtarget::initializeSubtargetDependencies().

◆ HasTrigReducedRange

bool llvm::AMDGPUSubtarget::HasTrigReducedRange = false

protected

Definition at line 65 of file AMDGPUSubtarget.h.

Referenced by hasTrigReducedRange().

◆ HasTrue16BitInsts

bool llvm::AMDGPUSubtarget::HasTrue16BitInsts = false

protected

Definition at line 52 of file AMDGPUSubtarget.h.

Referenced by hasTrue16BitInsts().

◆ HasVOP3PInsts

bool llvm::AMDGPUSubtarget::HasVOP3PInsts = false

protected

Definition at line 58 of file AMDGPUSubtarget.h.

Referenced by hasVOP3PInsts().

◆ LocalMemorySize

unsigned llvm::AMDGPUSubtarget::LocalMemorySize = 0

protected

Definition at line 69 of file AMDGPUSubtarget.h.

Referenced by getLocalMemorySize(), llvm::GCNSubtarget::initializeSubtargetDependencies(), and llvm::R600Subtarget::R600Subtarget().

◆ MaxWavesPerEU

unsigned llvm::AMDGPUSubtarget::MaxWavesPerEU = 10

protected

Definition at line 68 of file AMDGPUSubtarget.h.

Referenced by llvm::GCNSubtarget::GCNSubtarget(), and getMaxWavesPerEU().

◆ WavefrontSizeLog2

char llvm::AMDGPUSubtarget::WavefrontSizeLog2 = 0

protected

Definition at line 71 of file AMDGPUSubtarget.h.

Referenced by getWavefrontSize(), getWavefrontSizeLog2(), and llvm::GCNSubtarget::initializeSubtargetDependencies().

The documentation for this class was generated from the following files:

lib/Target/AMDGPU/AMDGPUSubtarget.h
lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Public Types

Public Member Functions

Static Public Member Functions

Protected Attributes

Detailed Description

Member Enumeration Documentation

◆ Generation

Constructor & Destructor Documentation

◆ AMDGPUSubtarget()

◆ ~AMDGPUSubtarget()

Member Function Documentation

◆ get() [1/2]

◆ get() [2/2]

◆ getAddressableLocalMemorySize()

◆ getAlignmentForImplicitArgPtr()

◆ getAMDGPUDwarfFlavour()

◆ getDefaultFlatWorkGroupSize()

◆ getEffectiveWavesPerEU()

◆ getEUsPerCU()

◆ getExplicitKernArgSize()

◆ getExplicitKernelArgOffset()

◆ getFlatWorkGroupSizes()

◆ getImplicitArgNumBytes()

◆ getKernArgSegmentSize()

◆ getLocalMemorySize()

◆ getMaxFlatWorkGroupSize()

◆ getMaxLocalMemSizeWithWaveCount()

◆ getMaxNumWorkGroups()

◆ getMaxWavesPerEU()

◆ getMaxWorkGroupsPerCU()

◆ getMaxWorkitemID()

◆ getMinFlatWorkGroupSize()

◆ getMinWavesPerEU()

◆ getOccupancyWithLocalMemSize() [1/2]

◆ getOccupancyWithLocalMemSize() [2/2]

◆ getWavefrontSize()

◆ getWavefrontSizeLog2()

◆ getWavesPerEU() [1/2]

◆ getWavesPerEU() [2/2]

◆ getWavesPerEUForWorkGroup()

◆ has16BitInsts()

◆ hasDsSrc2Insts()

◆ hasFastFMAF32()

◆ hasFminFmaxLegacy()

◆ hasInv2PiInlineImm()

◆ hasMadMacF32Insts()

◆ hasMadMixInsts()

◆ hasMulI24()

◆ hasMulU24()

◆ hasSDWA()

◆ hasSMulHi()

◆ hasTrigReducedRange()

◆ hasTrue16BitInsts()

◆ hasVOP3PInsts()

◆ isAmdHsaOrMesa()

◆ isAmdHsaOS()

◆ isAmdPalOS()

◆ isGCN()

◆ isGCN3Encoding()

◆ isMesa3DOS()

◆ isMesaKernel()

◆ isPromoteAllocaEnabled()

◆ isSingleLaneExecution()

◆ makeLIDRangeMetadata()

◆ useRealTrue16Insts()

Member Data Documentation

◆ AddressableLocalMemorySize

◆ EnablePromoteAlloca

◆ EnableRealTrue16Insts

◆ EUsPerCU

◆ FastFMAF32

◆ GCN3Encoding

◆ Has16BitInsts

◆ HasDsSrc2Insts

◆ HasFminFmaxLegacy

◆ HasInv2PiInlineImm

◆ HasMadMacF32Insts

◆ HasMadMixInsts

◆ HasMulI24

◆ HasMulU24