LLVM
15.0.0git
|
#include "Target/AMDGPU/AMDGPUSubtarget.h"
Public Types | |
enum | Generation { INVALID = 0, R600 = 1, R700 = 2, EVERGREEN = 3, NORTHERN_ISLANDS = 4, SOUTHERN_ISLANDS = 5, SEA_ISLANDS = 6, VOLCANIC_ISLANDS = 7, GFX9 = 8, GFX10 = 9, GFX11 = 10 } |
Public Member Functions | |
AMDGPUSubtarget (const Triple &TT) | |
std::pair< unsigned, unsigned > | getDefaultFlatWorkGroupSize (CallingConv::ID CC) const |
std::pair< unsigned, unsigned > | getFlatWorkGroupSizes (const Function &F) const |
std::pair< unsigned, unsigned > | getWavesPerEU (const Function &F) const |
std::pair< unsigned, unsigned > | getWavesPerEU (const Function &F, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const |
Overload which uses the specified values for the flat work group sizes, rather than querying the function itself. More... | |
unsigned | getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const |
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount. More... | |
unsigned | getOccupancyWithLocalMemSize (uint32_t Bytes, const Function &) const |
Inverse of getMaxLocalMemWithWaveCount. More... | |
unsigned | getOccupancyWithLocalMemSize (const MachineFunction &MF) const |
bool | isAmdHsaOS () const |
bool | isAmdPalOS () const |
bool | isMesa3DOS () const |
bool | isMesaKernel (const Function &F) const |
bool | isAmdHsaOrMesa (const Function &F) const |
bool | isGCN () const |
bool | isGCN3Encoding () const |
bool | has16BitInsts () const |
bool | hasTrue16BitInsts () const |
bool | hasMadMixInsts () const |
bool | hasMadMacF32Insts () const |
bool | hasDsSrc2Insts () const |
bool | hasSDWA () const |
bool | hasVOP3PInsts () const |
bool | hasMulI24 () const |
bool | hasMulU24 () const |
bool | hasSMulHi () const |
bool | hasInv2PiInlineImm () const |
bool | hasFminFmaxLegacy () const |
bool | hasTrigReducedRange () const |
bool | isPromoteAllocaEnabled () const |
unsigned | getWavefrontSize () const |
unsigned | getWavefrontSizeLog2 () const |
unsigned | getLocalMemorySize () const |
Align | getAlignmentForImplicitArgPtr () const |
unsigned | getExplicitKernelArgOffset (const Function &F) const |
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument. More... | |
virtual unsigned | getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const =0 |
virtual unsigned | getMinFlatWorkGroupSize () const =0 |
virtual unsigned | getMaxFlatWorkGroupSize () const =0 |
virtual unsigned | getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const =0 |
virtual unsigned | getMinWavesPerEU () const =0 |
unsigned | getMaxWavesPerEU () const |
unsigned | getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const |
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension. More... | |
bool | makeLIDRangeMetadata (Instruction *I) const |
Creates value range metadata on an workitemid.* intrinsic call or load. More... | |
unsigned | getImplicitArgNumBytes (const Function &F) const |
uint64_t | getExplicitKernArgSize (const Function &F, Align &MaxAlign) const |
unsigned | getKernArgSegmentSize (const Function &F, Align &MaxAlign) const |
AMDGPUDwarfFlavour | getAMDGPUDwarfFlavour () const |
virtual | ~AMDGPUSubtarget ()=default |
Static Public Member Functions | |
static const AMDGPUSubtarget & | get (const MachineFunction &MF) |
static const AMDGPUSubtarget & | get (const TargetMachine &TM, const Function &F) |
Protected Attributes | |
bool | GCN3Encoding = false |
bool | Has16BitInsts = false |
bool | HasTrue16BitInsts = false |
bool | HasMadMixInsts = false |
bool | HasMadMacF32Insts = false |
bool | HasDsSrc2Insts = false |
bool | HasSDWA = false |
bool | HasVOP3PInsts = false |
bool | HasMulI24 = true |
bool | HasMulU24 = true |
bool | HasSMulHi = false |
bool | HasInv2PiInlineImm = false |
bool | HasFminFmaxLegacy = true |
bool | EnablePromoteAlloca = false |
bool | HasTrigReducedRange = false |
unsigned | MaxWavesPerEU = 10 |
unsigned | LocalMemorySize = 0 |
char | WavefrontSizeLog2 = 0 |
Definition at line 29 of file AMDGPUSubtarget.h.
Enumerator | |
---|---|
INVALID | |
R600 | |
R700 | |
EVERGREEN | |
NORTHERN_ISLANDS | |
SOUTHERN_ISLANDS | |
SEA_ISLANDS | |
VOLCANIC_ISLANDS | |
GFX9 | |
GFX10 | |
GFX11 |
Definition at line 31 of file AMDGPUSubtarget.h.
Definition at line 157 of file AMDGPUSubtarget.cpp.
|
virtualdefault |
|
static |
Definition at line 971 of file AMDGPUSubtarget.cpp.
References llvm::Triple::amdgcn, llvm::Triple::getArch(), llvm::MachineFunction::getSubtarget(), llvm::MachineFunction::getTarget(), and llvm::TargetMachine::getTargetTriple().
Referenced by llvm::AMDGPUMachineFunction::AMDGPUMachineFunction(), llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute(), llvm::AMDGPUTargetLowering::getImplicitParameterOffset(), and promoteAllocasToVector().
|
static |
Definition at line 978 of file AMDGPUSubtarget.cpp.
References llvm::Triple::amdgcn, F, and TM.
|
inline |
Definition at line 212 of file AMDGPUSubtarget.h.
References Align, and isAmdHsaOS().
Referenced by getKernArgSegmentSize().
AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour | ( | ) | const |
WavefrontSize
. Definition at line 565 of file AMDGPUSubtarget.cpp.
References getWavefrontSize(), llvm::Wave32, and llvm::Wave64.
std::pair< unsigned, unsigned > AMDGPUSubtarget::getDefaultFlatWorkGroupSize | ( | CallingConv::ID | CC | ) | const |
Definition at line 355 of file AMDGPUSubtarget.cpp.
References llvm::CallingConv::AMDGPU_ES, llvm::CallingConv::AMDGPU_GS, llvm::CallingConv::AMDGPU_HS, llvm::CallingConv::AMDGPU_LS, llvm::CallingConv::AMDGPU_PS, llvm::CallingConv::AMDGPU_VS, getMaxFlatWorkGroupSize(), and getWavefrontSize().
Referenced by getFlatWorkGroupSizes().
Definition at line 523 of file AMDGPUSubtarget.cpp.
References Align, llvm::alignTo(), llvm::CallingConv::AMDGPU_KERNEL, Arg, assert(), DL, F, llvm::max(), llvm::None, and llvm::CallingConv::SPIR_KERNEL.
Referenced by getKernArgSegmentSize().
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition at line 218 of file AMDGPUSubtarget.h.
References llvm::Triple::AMDHSA, llvm::Triple::AMDPAL, llvm::Triple::getOS(), llvm_unreachable, llvm::Triple::Mesa3D, and llvm::Triple::UnknownOS.
Referenced by getKernArgSegmentSize(), and llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel().
F
, or minimum/maximum flat work group sizes explicitly requested using "amdgpu-flat-work-group-size" attribute attached to function F
.Definition at line 369 of file AMDGPUSubtarget.cpp.
References F, getDefaultFlatWorkGroupSize(), llvm::AMDGPU::getIntegerPairAttribute(), getMaxFlatWorkGroupSize(), and getMinFlatWorkGroupSize().
Referenced by getMaxLocalMemSizeWithWaveCount(), getMaxWorkitemID(), getOccupancyWithLocalMemSize(), getWavesPerEU(), and makeLIDRangeMetadata().
Definition at line 507 of file AMDGPUSubtarget.cpp.
References assert(), F, llvm::AMDGPU::getAmdhsaCodeObjectVersion(), llvm::AMDGPU::getIntegerAttribute(), llvm::AMDGPU::isKernel(), and isMesaKernel().
Referenced by getKernArgSegmentSize().
Definition at line 547 of file AMDGPUSubtarget.cpp.
References llvm::alignTo(), F, getAlignmentForImplicitArgPtr(), getExplicitKernArgSize(), getExplicitKernelArgOffset(), getImplicitArgNumBytes(), and llvm::max().
Referenced by llvm::AMDGPU::HSAMD::MetadataStreamerV3::getHSAKernelProps().
|
inline |
Definition at line 208 of file AMDGPUSubtarget.h.
References LocalMemorySize.
Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithLocalMemSize().
|
pure virtual |
Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getDefaultFlatWorkGroupSize(), and getFlatWorkGroupSizes().
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount | ( | unsigned | WaveCount, |
const Function & | F | ||
) | const |
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition at line 299 of file AMDGPUSubtarget.cpp.
References F, getFlatWorkGroupSizes(), getLocalMemorySize(), getMaxWavesPerEU(), and getMaxWorkGroupsPerCU().
|
inline |
Definition at line 255 of file AMDGPUSubtarget.h.
References MaxWavesPerEU.
Referenced by llvm::GCNSubtarget::computeOccupancy(), getMaxLocalMemSizeWithWaveCount(), getOccupancyWithLocalMemSize(), llvm::GCNSubtarget::getOccupancyWithNumSGPRs(), llvm::GCNSubtarget::getOccupancyWithNumVGPRs(), and getWavesPerEU().
|
pure virtual |
FlatWorkGroupSize
. Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithLocalMemSize().
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition at line 437 of file AMDGPUSubtarget.cpp.
References getFlatWorkGroupSizes(), getReqdWorkGroupSize(), and llvm::max().
Referenced by llvm::AMDGPUTargetLowering::computeKnownBitsForTargetNode(), llvm::AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic(), and llvm::SITargetLowering::passSpecialInputs().
|
pure virtual |
Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getFlatWorkGroupSizes().
|
pure virtual |
Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getWavesPerEU().
unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize | ( | const MachineFunction & | MF | ) | const |
Definition at line 349 of file AMDGPUSubtarget.cpp.
References llvm::MachineFunction::getFunction(), llvm::MachineFunction::getInfo(), and getOccupancyWithLocalMemSize().
Inverse of getMaxLocalMemWithWaveCount.
Return the maximum wavecount if the given LDS memory size is the only constraint.
Definition at line 312 of file AMDGPUSubtarget.cpp.
References assert(), F, getFlatWorkGroupSizes(), getLocalMemorySize(), getMaxWavesPerEU(), getMaxWorkGroupsPerCU(), getWavefrontSize(), and llvm::min().
Referenced by llvm::GCNSubtarget::computeOccupancy(), getOccupancyWithLocalMemSize(), and llvm::SIRegisterInfo::getRegPressureLimit().
|
inline |
Definition at line 200 of file AMDGPUSubtarget.h.
References WavefrontSizeLog2.
Referenced by llvm::SIRegisterInfo::buildSpillLoadStore(), getAMDGPUDwarfFlavour(), getDefaultFlatWorkGroupSize(), llvm::AMDGPU::HSAMD::MetadataStreamerV3::getHSAKernelProps(), getOccupancyWithLocalMemSize(), llvm::SITargetLowering::getRegClassFor(), llvm::SIInstrInfo::getScratchRsrcWords23(), llvm::GCNSubtarget::isWave32(), llvm::GCNSubtarget::isWave64(), llvm::AMDGPULegalizerInfo::legalizeIntrinsic(), lowerFCMPIntrinsic(), lowerICMPIntrinsic(), and llvm::SITargetLowering::requiresUniformRegister().
|
inline |
Definition at line 204 of file AMDGPUSubtarget.h.
References WavefrontSizeLog2.
Referenced by llvm::SIRegisterInfo::eliminateFrameIndex(), and llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex().
|
inline |
F
, or minimum/maximum number of waves per execution unit explicitly requested using "amdgpu-waves-per-eu" attribute attached to function F
.Definition at line 96 of file AMDGPUSubtarget.h.
References F, and getFlatWorkGroupSizes().
Referenced by llvm::GCNSubtarget::getMaxNumSGPRs(), and llvm::GCNSubtarget::getMaxNumVGPRs().
std::pair< unsigned, unsigned > AMDGPUSubtarget::getWavesPerEU | ( | const Function & | F, |
std::pair< unsigned, unsigned > | FlatWorkGroupSizes | ||
) | const |
Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
FlatWorkGroupSizes
Should correspond to the function's value for getFlatWorkGroupSizes.
Definition at line 392 of file AMDGPUSubtarget.cpp.
References F, llvm::AMDGPU::getIntegerPairAttribute(), getMaxWavesPerEU(), getMinWavesPerEU(), and getWavesPerEUForWorkGroup().
|
pure virtual |
FlatWorkGroupSize
. Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getWavesPerEU().
|
inline |
Definition at line 146 of file AMDGPUSubtarget.h.
References Has16BitInsts.
Referenced by llvm::GCNTTIImpl::getArithmeticInstrCost(), llvm::SITargetLowering::getAsmOperandConstVal(), llvm::GCNTTIImpl::getIntrinsicInstrCost(), llvm::GCNTTIImpl::getMaximumVF(), llvm::SITargetLowering::getNumRegistersForCallingConv(), llvm::SITargetLowering::getPreferredShiftAmountTy(), llvm::SITargetLowering::getRegisterTypeForCallingConv(), llvm::GCNTTIImpl::getVectorInstrCost(), llvm::SITargetLowering::getVectorTypeBreakdownForCallingConv(), llvm::AMDGPUTargetLowering::isFAbsFree(), llvm::SITargetLowering::isFMAFasterThanFMulAndFAdd(), llvm::AMDGPUTargetLowering::isFPImmLegal(), llvm::SIInstrInfo::isInlineConstant(), llvm::AMDGPUTargetLowering::isTruncateFree(), llvm::SITargetLowering::isTypeDesirableForOp(), llvm::AMDGPUTargetLowering::isZExtFree(), llvm::AMDGPUTargetLowering::LowerSINT_TO_FP(), llvm::AMDGPUTargetLowering::LowerUINT_TO_FP(), llvm::AMDGPUTargetLowering::performFAbsCombine(), llvm::AMDGPUTargetLowering::performMulCombine(), and llvm::SITargetLowering::SITargetLowering().
|
inline |
Definition at line 160 of file AMDGPUSubtarget.h.
References HasDsSrc2Insts.
|
inline |
Definition at line 188 of file AMDGPUSubtarget.h.
References HasFminFmaxLegacy.
Referenced by AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(), and llvm::AMDGPUTargetLowering::performSelectCombine().
|
inline |
Definition at line 184 of file AMDGPUSubtarget.h.
References HasInv2PiInlineImm.
Referenced by llvm::SITargetLowering::checkAsmConstraintValA(), llvm::SIRegisterInfo::eliminateFrameIndex(), llvm::AMDGPUTargetLowering::isConstantCostlierToNegate(), and llvm::SIInstrInfo::isInlineConstant().
|
inline |
Definition at line 156 of file AMDGPUSubtarget.h.
References HasMadMacF32Insts, and isGCN().
Referenced by llvm::GCNTTIImpl::getArithmeticInstrCost(), llvm::SITargetLowering::isFMADLegal(), llvm::SITargetLowering::isFMAFasterThanFMulAndFAdd(), llvm::AMDGPUTargetLowering::LowerDIVREM24(), llvm::AMDGPUTargetLowering::LowerUDIVREM64(), and llvm::SITargetLowering::SITargetLowering().
|
inline |
Definition at line 152 of file AMDGPUSubtarget.h.
References HasMadMixInsts.
Referenced by llvm::SITargetLowering::isFPExtFoldable().
|
inline |
Definition at line 172 of file AMDGPUSubtarget.h.
References HasMulI24.
Referenced by llvm::AMDGPUTargetLowering::performMulCombine(), llvm::AMDGPUTargetLowering::performMulhsCombine(), and llvm::AMDGPUTargetLowering::performMulLoHiCombine().
|
inline |
Definition at line 176 of file AMDGPUSubtarget.h.
References HasMulU24.
Referenced by llvm::AMDGPUTargetLowering::performMulCombine(), llvm::AMDGPUTargetLowering::performMulhuCombine(), and llvm::AMDGPUTargetLowering::performMulLoHiCombine().
|
inline |
Definition at line 164 of file AMDGPUSubtarget.h.
References HasSDWA.
Referenced by llvm::AMDGPUTargetLowering::PerformDAGCombine(), and llvm::SIInstrInfo::verifyInstruction().
|
inline |
Definition at line 180 of file AMDGPUSubtarget.h.
References HasSMulHi.
Referenced by llvm::AMDGPUTargetLowering::performMulhsCombine(), and llvm::AMDGPUTargetLowering::performMulhuCombine().
|
inline |
Definition at line 192 of file AMDGPUSubtarget.h.
References HasTrigReducedRange.
Referenced by llvm::AMDGPULegalizerInfo::legalizeSinCos().
|
inline |
Definition at line 150 of file AMDGPUSubtarget.h.
References HasTrue16BitInsts.
|
inline |
Definition at line 168 of file AMDGPUSubtarget.h.
References HasVOP3PInsts.
Referenced by llvm::GCNTTIImpl::getArithmeticReductionCost(), llvm::GCNTTIImpl::getMinMaxReductionCost(), llvm::GCNTTIImpl::getShuffleCost(), and llvm::SITargetLowering::SITargetLowering().
Definition at line 134 of file AMDGPUSubtarget.h.
References F, isAmdHsaOS(), and isMesaKernel().
|
inline |
Definition at line 120 of file AMDGPUSubtarget.h.
References llvm::Triple::AMDHSA, and llvm::Triple::getOS().
Referenced by getAlignmentForImplicitArgPtr(), llvm::SIInstrInfo::getDefaultRsrcDataFormat(), llvm::GCNSubtarget::getTrapHandlerAbi(), llvm::GCNSubtarget::initializeSubtargetDependencies(), isAmdHsaOrMesa(), llvm::SITargetLowering::LowerFormalArguments(), and llvm::AMDGPUAsmPrinter::runOnMachineFunction().
|
inline |
Definition at line 124 of file AMDGPUSubtarget.h.
References llvm::Triple::AMDPAL, and llvm::Triple::getOS().
Referenced by llvm::SITargetLowering::LowerFormalArguments(), and llvm::AMDGPUAsmPrinter::runOnMachineFunction().
|
inline |
Definition at line 138 of file AMDGPUSubtarget.h.
References llvm::Triple::amdgcn, and llvm::Triple::getArch().
Referenced by hasMadMacF32Insts(), and llvm::AMDGPUTargetLowering::LowerINT_TO_FP32().
|
inline |
Definition at line 142 of file AMDGPUSubtarget.h.
References GCN3Encoding.
|
inline |
Definition at line 128 of file AMDGPUSubtarget.h.
References llvm::Triple::getOS(), and llvm::Triple::Mesa3D.
Referenced by llvm::GCNSubtarget::isMesaGfxShader(), and isMesaKernel().
Definition at line 433 of file AMDGPUSubtarget.cpp.
References F, isMesa3DOS(), and llvm::AMDGPU::isShader().
Referenced by getImplicitArgNumBytes(), and isAmdHsaOrMesa().
|
inline |
Definition at line 196 of file AMDGPUSubtarget.h.
References EnablePromoteAlloca.
bool AMDGPUSubtarget::makeLIDRangeMetadata | ( | Instruction * | I | ) | const |
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition at line 445 of file AMDGPUSubtarget.cpp.
References llvm::MDBuilder::createRange(), F, getFlatWorkGroupSizes(), getReqdWorkGroupSize(), I, if(), LLVM_FALLTHROUGH, and llvm::max().
|
protected |
Definition at line 62 of file AMDGPUSubtarget.h.
Referenced by isPromoteAllocaEnabled().
|
protected |
Definition at line 49 of file AMDGPUSubtarget.h.
Referenced by isGCN3Encoding().
|
protected |
Definition at line 50 of file AMDGPUSubtarget.h.
Referenced by has16BitInsts().
|
protected |
Definition at line 54 of file AMDGPUSubtarget.h.
Referenced by hasDsSrc2Insts().
|
protected |
Definition at line 61 of file AMDGPUSubtarget.h.
Referenced by hasFminFmaxLegacy(), and llvm::GCNSubtarget::initializeSubtargetDependencies().
|
protected |
Definition at line 60 of file AMDGPUSubtarget.h.
Referenced by hasInv2PiInlineImm().
|
protected |
Definition at line 53 of file AMDGPUSubtarget.h.
Referenced by hasMadMacF32Insts().
|
protected |
Definition at line 52 of file AMDGPUSubtarget.h.
Referenced by hasMadMixInsts().
|
protected |
Definition at line 57 of file AMDGPUSubtarget.h.
Referenced by hasMulI24(), and llvm::R600Subtarget::initializeSubtargetDependencies().
|
protected |
Definition at line 58 of file AMDGPUSubtarget.h.
Referenced by hasMulU24(), and llvm::R600Subtarget::initializeSubtargetDependencies().
|
protected |
Definition at line 55 of file AMDGPUSubtarget.h.
Referenced by hasSDWA().
|
protected |
Definition at line 59 of file AMDGPUSubtarget.h.
Referenced by hasSMulHi(), and llvm::GCNSubtarget::initializeSubtargetDependencies().
|
protected |
Definition at line 63 of file AMDGPUSubtarget.h.
Referenced by hasTrigReducedRange().
|
protected |
Definition at line 51 of file AMDGPUSubtarget.h.
Referenced by hasTrue16BitInsts().
|
protected |
Definition at line 56 of file AMDGPUSubtarget.h.
Referenced by hasVOP3PInsts().
|
protected |
Definition at line 65 of file AMDGPUSubtarget.h.
Referenced by getLocalMemorySize(), and llvm::GCNSubtarget::initializeSubtargetDependencies().
|
protected |
Definition at line 64 of file AMDGPUSubtarget.h.
Referenced by llvm::GCNSubtarget::GCNSubtarget(), and getMaxWavesPerEU().
|
protected |
Definition at line 66 of file AMDGPUSubtarget.h.
Referenced by getWavefrontSize(), getWavefrontSizeLog2(), and llvm::GCNSubtarget::initializeSubtargetDependencies().