LLVM 23.0.0git
llvm::GCNSubtarget Class Referencefinal

#include "Target/AMDGPU/GCNSubtarget.h"

Inheritance diagram for llvm::GCNSubtarget:
[legend]

Public Types

enum class  TrapHandlerAbi { NONE = 0x00 , AMDHSA = 0x01 }
enum class  TrapID { LLVMAMDHSATrap = 0x02 , LLVMAMDHSADebugTrap = 0x03 }
Public Types inherited from llvm::AMDGPUSubtarget
enum  Generation {
  INVALID = 0 , R600 = 1 , R700 = 2 , EVERGREEN = 3 ,
  NORTHERN_ISLANDS = 4 , SOUTHERN_ISLANDS = 5 , SEA_ISLANDS = 6 , VOLCANIC_ISLANDS = 7 ,
  GFX9 = 8 , GFX10 = 9 , GFX11 = 10 , GFX12 = 11 ,
  GFX13 = 12
}

Public Member Functions

 GCNSubtarget (const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
 ~GCNSubtarget () override
GCNSubtargetinitializeSubtargetDependencies (const Triple &TT, StringRef GPU, StringRef FS)
void checkSubtargetFeatures (const Function &F) const
 Diagnose inconsistent subtarget features before attempting to codegen function F.
const SIInstrInfogetInstrInfo () const override
const SIFrameLoweringgetFrameLowering () const override
const SITargetLoweringgetTargetLowering () const override
const SIRegisterInfogetRegisterInfo () const override
const SelectionDAGTargetInfogetSelectionDAGInfo () const override
const CallLoweringgetCallLowering () const override
const InlineAsmLoweringgetInlineAsmLowering () const override
InstructionSelectorgetInstructionSelector () const override
const LegalizerInfogetLegalizerInfo () const override
const AMDGPURegisterBankInfogetRegBankInfo () const override
const AMDGPU::IsaInfo::AMDGPUTargetIDgetTargetID () const
const InstrItineraryDatagetInstrItineraryData () const override
void ParseSubtargetFeatures (StringRef CPU, StringRef TuneCPU, StringRef FS)
Generation getGeneration () const
bool isGFX11Plus () const
unsigned getMaxWaveScratchSize () const
unsigned getKnownHighZeroBitsForFrameIndex () const
 Return the number of high bits known to be zero for a frame index.
int getLDSBankCount () const
unsigned getInstCacheLineSize () const
 Instruction cache line size in bytes (64 for pre-GFX11, 128 for GFX11+).
unsigned getMaxPrivateElementSize (bool ForBufferRSrc=false) const
unsigned getConstantBusLimit (unsigned Opcode) const
bool zeroesHigh16BitsOfDest (unsigned Opcode) const
 Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicitly zeroes the high 16-bits, rather than preserve the original value.
bool supportsWGP () const
bool hasHWFP64 () const
bool hasAddr64 () const
bool hasFlat () const
bool hasOnlyRevVALUShifts () const
bool hasFractBug () const
bool hasMed3_16 () const
bool hasMin3Max3_16 () const
bool hasSwap () const
bool hasScalarPackInsts () const
bool hasScalarMulHiInsts () const
bool hasScalarSubwordLoads () const
TrapHandlerAbi getTrapHandlerAbi () const
bool supportsGetDoorbellID () const
bool hasUsableDSOffset () const
 True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled () const
bool hasUsableDivScaleConditionOutput () const
 Condition output from div_scale is usable.
bool hasReadVCCZBug () const
 Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool partialVCCWritesUpdateVCCZ () const
 Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasSMRDReadVALUDefHazard () const
 A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU instruction.
bool hasVMEMReadSGPRVALUDefHazard () const
 A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU Instruction.
bool hasRFEHazards () const
unsigned getSetRegWaitStates () const
 Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
unsigned getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
 Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool supportsMinMaxDenormModes () const
bool hasDenormModeInst () const
bool useDS128 () const
bool hasDS96AndDS128 () const
bool haveRoundOpsF64 () const
 Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool privateMemoryResourceIsRangeChecked () const
bool usePRTStrictNull () const
bool hasUnalignedBufferAccessEnabled () const
bool hasUnalignedDSAccessEnabled () const
bool hasUnalignedScratchAccessEnabled () const
bool isXNACKEnabled () const
bool isTgSplitEnabled () const
bool isCuModeEnabled () const
bool isPreciseMemoryEnabled () const
bool hasFlatScrRegister () const
bool hasFlatScratchSTMode () const
bool hasFlatScratchSVSMode () const
bool hasFlatScratchEnabled () const
bool hasGlobalAddTidInsts () const
bool hasAtomicCSub () const
bool hasMTBUFInsts () const
bool hasFormattedMUBUFInsts () const
bool hasExportInsts () const
bool hasVINTERPEncoding () const
bool hasLdsAtomicAddF64 () const
bool hasMultiDwordFlatScratchAddressing () const
bool hasFlatLgkmVMemCountInOrder () const
bool hasD16LoadStore () const
bool d16PreservesUnusedBits () const
bool hasD16Images () const
bool ldsRequiresM0Init () const
 Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasGWSAutoReplay () const
bool hasGWSSemaReleaseAll () const
bool hasScalarAddSub64 () const
bool hasScalarSMulU64 () const
bool isMesaGfxShader (const Function &F) const
bool hasMad64_32 () const
bool hasAtomicFaddInsts () const
bool vmemWriteNeedsExpWaitcnt () const
bool hasInstPrefetch () const
bool hasPrefetch () const
bool hasSCmpK () const
Align getStackAlignment () const
bool enableMachineScheduler () const override
bool useAA () const override
bool enableSubRegLiveness () const override
void setScalarizeGlobalBehavior (bool b)
bool getScalarizeGlobalBehavior () const
bool enableEarlyIfConversion () const override
void overrideSchedPolicy (MachineSchedPolicy &Policy, const SchedRegion &Region) const override
void overridePostRASchedPolicy (MachineSchedPolicy &Policy, const SchedRegion &Region) const override
void mirFileLoaded (MachineFunction &MF) const override
unsigned getMaxNumUserSGPRs () const
bool useVGPRIndexMode () const
bool hasScalarCompareEq64 () const
bool hasLDSFPAtomicAddF32 () const
bool hasLDSFPAtomicAddF64 () const
bool hasPermLaneX16 () const
bool hasPermLane64 () const
bool hasDPPRowShare () const
bool hasPkMovB32 () const
bool hasFmaakFmamkF32Insts () const
bool hasFmaakFmamkF64Insts () const
bool hasNonNSAEncoding () const
unsigned getNSAMaxSize (bool HasSampler=false) const
bool hasMadF16 () const
bool hasMovB64 () const
bool hasScaleOffset () const
bool hasSignedGVSOffset () const
bool loadStoreOptEnabled () const
bool hasUserSGPRInit16BugInWave32 () const
bool has12DWordStoreHazard () const
bool hasDwordx3LoadStores () const
bool hasReadM0MovRelInterpHazard () const
bool hasReadM0SendMsgHazard () const
bool hasReadM0LdsDmaHazard () const
bool hasReadM0LdsDirectHazard () const
bool hasLDSMisalignedBugInWGPMode () const
bool hasShift64HighRegBug () const
bool hasTransForwardingHazard () const
bool hasDstSelForwardingHazard () const
bool hasDOTOpSelHazard () const
bool hasVDecCoExecHazard () const
bool hasHardClauses () const
bool hasFPAtomicToDenormModeHazard () const
bool hasVOP3DPP () const
bool hasLdsDirect () const
bool hasLdsWaitVMSRC () const
bool hasVALUPartialForwardingHazard () const
bool hasCvtScaleForwardingHazard () const
bool hasLoopHeadInstSplitSensitivity () const
bool requiresCodeObjectV6 () const
bool useVGPRBlockOpsForCSR () const
bool hasVALUMaskWriteHazard () const
bool hasVALUReadSGPRHazard () const
bool setRegModeNeedsVNOPs () const
bool needsAlignedVGPRs () const
 Return if operations acting on VGPR tuples require even alignment.
bool hasSPackHL () const
 Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasCompressedExport () const
 Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (enable) bits.
bool hasNullExportTarget () const
 Return true if the target's EXP instruction supports the NULL export target.
bool hasFlatScratchSVSSwizzleBug () const
bool hasDelayAlu () const
 Return true if the target has the S_DELAY_ALU instruction.
bool hasLDSLoadB96_B128 () const
 Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
bool hasExtendedWaitCounts () const
bool hasNoF16PseudoScalarTransInlineConstants () const
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput () const
bool hasAddPC64Inst () const
bool hasExpertSchedulingMode () const
unsigned maxHardClauseLength () const
unsigned getOccupancyWithNumSGPRs (unsigned SGPRs) const
 Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
unsigned getOccupancyWithNumVGPRs (unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
 Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
std::pair< unsigned, unsignedcomputeOccupancy (const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
 Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F, each workgroup uses LDSSize bytes of LDS, and each wave uses NumSGPRs SGPRs and NumVGPRs VGPRs.
bool flatScratchIsPointer () const
bool hasMergedShaders () const
bool hasLegacyGeometry () const
bool hasSplitBarriers () const
bool hasRrWGMode () const
bool hasSignedScratchOffsets () const
bool hasINVWBL2WaitCntRequirement () const
bool hasVOPD3 () const
bool hasVectorMulU64 () const
bool hasMadU64U32NoCarry () const
bool hasIntMinMax64 () const
bool hasPkMinMax3Insts () const
bool hasSGetShaderCyclesInst () const
bool hasGetPCZeroExtension () const
bool needsKernArgPreloadProlog () const
bool hasCondSubInsts () const
bool hasSubClampInsts () const
unsigned getSGPRAllocGranule () const
unsigned getSGPREncodingGranule () const
unsigned getTotalNumSGPRs () const
unsigned getAddressableNumSGPRs () const
unsigned getMinNumSGPRs (unsigned WavesPerEU) const
unsigned getMaxNumSGPRs (unsigned WavesPerEU, bool Addressable) const
unsigned getBaseReservedNumSGPRs (const bool HasFlatScratch) const
unsigned getReservedNumSGPRs (const MachineFunction &MF) const
unsigned getReservedNumSGPRs (const Function &F) const
unsigned getMaxNumPreloadedSGPRs () const
unsigned getBaseMaxNumSGPRs (const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
unsigned getMaxNumSGPRs (const MachineFunction &MF) const
unsigned getMaxNumSGPRs (const Function &F) const
unsigned getVGPRAllocGranule (unsigned DynamicVGPRBlockSize) const
unsigned getVGPREncodingGranule () const
unsigned getTotalNumVGPRs () const
unsigned getAddressableNumArchVGPRs () const
unsigned getAddressableNumVGPRs (unsigned DynamicVGPRBlockSize) const
unsigned getMinNumVGPRs (unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getMaxNumVGPRs (unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getBaseMaxNumVGPRs (const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
unsigned getMaxNumVGPRs (const Function &F) const
unsigned getMaxNumAGPRs (const Function &F) const
std::pair< unsigned, unsignedgetMaxNumVectorRegs (const Function &F) const
 Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit required for the function MF.
unsigned getMaxNumVGPRs (const MachineFunction &MF) const
bool supportsWave32 () const
bool supportsWave64 () const
bool isWave32 () const
bool isWave64 () const
bool isWaveSizeKnown () const
 Returns if the wavesize of this subtarget is known reliable.
const TargetRegisterClassgetBoolRC () const
unsigned getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const override
unsigned getMinFlatWorkGroupSize () const override
unsigned getMaxFlatWorkGroupSize () const override
unsigned getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const override
unsigned getMinWavesPerEU () const override
void adjustSchedDependency (SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
bool shouldClusterStores () const
unsigned getNSAThreshold (const MachineFunction &MF) const
bool requiresNopBeforeDeallocVGPRs () const
bool requiresWaitIdleBeforeGetReg () const
bool isDynamicVGPREnabled () const
unsigned getDynamicVGPRBlockSize () const
bool requiresDisjointEarlyClobberAndUndef () const override
bool hasDsAtomicAsyncBarrierArriveB64PipeBug () const
bool hasScratchBaseForwardingHazard () const
bool hasFlatScratchHiInB64InstHazard () const
bool requiresWaitXCntForSingleAccessInstructions () const
unsigned getSNopBits () const
bool supportsBPermute () const
bool supportsWaveWideBPermute () const
bool useRealTrue16Insts () const
 Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
bool requiresWaitOnWorkgroupReleaseFence () const
unsigned getMaxWavesPerEU () const
Public Member Functions inherited from llvm::AMDGPUSubtarget
 AMDGPUSubtarget (const Triple &TT)
std::pair< unsigned, unsignedgetDefaultFlatWorkGroupSize (CallingConv::ID CC) const
std::pair< unsigned, unsignedgetFlatWorkGroupSizes (const Function &F) const
std::optional< unsignedgetReqdWorkGroupSize (const Function &F, unsigned Dim) const
bool hasWavefrontsEvenlySplittingXDim (const Function &F, bool REquiresUniformYZ=false) const
std::pair< unsigned, unsignedgetWavesPerEU (const Function &F) const
std::pair< unsigned, unsignedgetWavesPerEU (std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes, const Function &F) const
 Overload which uses the specified values for the flat workgroup sizes and LDS space rather than querying the function itself.
std::pair< unsigned, unsignedgetEffectiveWavesPerEU (std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
 Returns the target minimum/maximum number of waves per EU.
unsigned getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
 Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
std::pair< unsigned, unsignedgetOccupancyWithWorkGroupSizes (uint32_t LDSBytes, const Function &F) const
 Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F and each workgroup running the function requires LDSBytes bytes of LDS space.
std::pair< unsigned, unsignedgetOccupancyWithWorkGroupSizes (uint32_t LDSBytes, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
 Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned, unsignedgetOccupancyWithWorkGroupSizes (const MachineFunction &MF) const
 Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is MF.
bool isAmdHsaOS () const
bool isAmdPalOS () const
bool isMesa3DOS () const
bool isMesaKernel (const Function &F) const
bool isAmdHsaOrMesa (const Function &F) const
bool isGCN () const
bool useRealTrue16Insts () const
 Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
bool hasMulI24 () const
bool hasMulU24 () const
bool hasSMulHi () const
bool hasFminFmaxLegacy () const
unsigned getWavefrontSize () const
unsigned getWavefrontSizeLog2 () const
unsigned getLocalMemorySize () const
 Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
unsigned getAddressableLocalMemorySize () const
 Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned getEUsPerCU () const
 Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.
Align getAlignmentForImplicitArgPtr () const
unsigned getExplicitKernelArgOffset () const
 Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
unsigned getMaxWavesPerEU () const
unsigned getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const
 Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
SmallVector< unsignedgetMaxNumWorkGroups (const Function &F) const
 Return the number of work groups for the function.
bool isSingleLaneExecution (const Function &Kernel) const
 Return true if only a single workitem can be active in a wave.
bool makeLIDRangeMetadata (Instruction *I) const
 Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned getImplicitArgNumBytes (const Function &F) const
uint64_t getExplicitKernArgSize (const Function &F, Align &MaxAlign) const
unsigned getKernArgSegmentSize (const Function &F, Align &MaxAlign) const
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour () const
virtual ~AMDGPUSubtarget ()=default

Static Public Member Functions

static bool hasHalfRate64Ops (const TargetSubtargetInfo &STI)
Static Public Member Functions inherited from llvm::AMDGPUSubtarget
static const AMDGPUSubtargetget (const MachineFunction &MF)
static const AMDGPUSubtargetget (const TargetMachine &TM, const Function &F)

Protected Attributes

AMDGPU::IsaInfo::AMDGPUTargetID TargetID
unsigned Gen = INVALID
InstrItineraryData InstrItins
int LDSBankCount = 0
unsigned MaxPrivateElementSize = 0
unsigned InstCacheLineSize = 0
bool DynamicVGPR = false
bool DynamicVGPRBlockSize32 = false
bool ScalarizeGlobal = false
unsigned MaxHardClauseLength = 0
 The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than the maximum argument to S_CLAUSE.
Protected Attributes inherited from llvm::AMDGPUSubtarget
bool HasMulI24 = true
bool HasMulU24 = true
bool HasSMulHi = false
bool HasFminFmaxLegacy = true
unsigned EUsPerCU = 4
unsigned MaxWavesPerEU = 10
unsigned LocalMemorySize = 0
unsigned AddressableLocalMemorySize = 0
char WavefrontSizeLog2 = 0
unsigned FlatOffsetBitWidth = 0

Detailed Description

Definition at line 33 of file GCNSubtarget.h.

Member Enumeration Documentation

◆ TrapHandlerAbi

Enumerator
NONE 
AMDHSA 

Definition at line 40 of file GCNSubtarget.h.

◆ TrapID

enum class llvm::GCNSubtarget::TrapID
strong
Enumerator
LLVMAMDHSATrap 
LLVMAMDHSADebugTrap 

Definition at line 45 of file GCNSubtarget.h.

Constructor & Destructor Documentation

◆ GCNSubtarget()

◆ ~GCNSubtarget()

GCNSubtarget::~GCNSubtarget ( )
overridedefault

References F, and GCNSubtarget().

Member Function Documentation

◆ adjustSchedDependency()

◆ checkSubtargetFeatures()

void GCNSubtarget::checkSubtargetFeatures ( const Function & F) const

Diagnose inconsistent subtarget features before attempting to codegen function F.

Definition at line 170 of file GCNSubtarget.cpp.

References F.

Referenced by llvm::AMDGPUDAGToDAGISel::runOnMachineFunction(), and llvm::AMDGPUInstructionSelector::setupMF().

◆ computeOccupancy()

std::pair< unsigned, unsigned > GCNSubtarget::computeOccupancy ( const Function & F,
unsigned LDSSize = 0,
unsigned NumSGPRs = 0,
unsigned NumVGPRs = 0 ) const

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F, each workgroup uses LDSSize bytes of LDS, and each wave uses NumSGPRs SGPRs and NumVGPRs VGPRs.

The flat workgroup sizes associated to the function are a range, so this returns a range as well.

Note that occupancy can be affected by the scratch allocation as well, but we do not have enough information to compute it.

Definition at line 452 of file GCNSubtarget.cpp.

References F, llvm::AMDGPU::getDynamicVGPRBlockSize(), getDynamicVGPRBlockSize(), getOccupancyWithNumSGPRs(), getOccupancyWithNumVGPRs(), llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes(), and isDynamicVGPREnabled().

◆ d16PreservesUnusedBits()

bool llvm::GCNSubtarget::d16PreservesUnusedBits ( ) const
inline

Definition at line 376 of file GCNSubtarget.h.

References hasD16LoadStore(), and TargetID.

◆ enableEarlyIfConversion()

bool llvm::GCNSubtarget::enableEarlyIfConversion ( ) const
inlineoverride

Definition at line 447 of file GCNSubtarget.h.

◆ enableMachineScheduler()

bool llvm::GCNSubtarget::enableMachineScheduler ( ) const
inlineoverride

Definition at line 434 of file GCNSubtarget.h.

◆ enableSubRegLiveness()

bool llvm::GCNSubtarget::enableSubRegLiveness ( ) const
inlineoverride

Definition at line 438 of file GCNSubtarget.h.

◆ flatScratchIsPointer()

bool llvm::GCNSubtarget::flatScratchIsPointer ( ) const
inline
Returns
true if the flat_scratch register should be initialized with the pointer to the wave's scratch memory rather than a size and offset.

Definition at line 674 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ getAddressableNumArchVGPRs()

unsigned llvm::GCNSubtarget::getAddressableNumArchVGPRs ( ) const
inline
Returns
Addressable number of architectural VGPRs supported by the subtarget.

Definition at line 822 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getAddressableNumArchVGPRs().

Referenced by getMaxNumVectorRegs().

◆ getAddressableNumSGPRs()

unsigned llvm::GCNSubtarget::getAddressableNumSGPRs ( ) const
inline
Returns
Addressable number of SGPRs supported by the subtarget.

Definition at line 748 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs().

◆ getAddressableNumVGPRs()

unsigned llvm::GCNSubtarget::getAddressableNumVGPRs ( unsigned DynamicVGPRBlockSize) const
inline
Returns
Addressable number of VGPRs supported by the subtarget.

Definition at line 827 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs().

◆ getBaseMaxNumSGPRs()

unsigned GCNSubtarget::getBaseMaxNumSGPRs ( const Function & F,
std::pair< unsigned, unsigned > WavesPerEU,
unsigned PreloadedSGPRs,
unsigned ReservedNumSGPRs ) const
Returns
max num SGPRs. This is the common utility function called by MachineFunction and Function variants of getMaxNumSGPRs.

Definition at line 469 of file GCNSubtarget.cpp.

References F, llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, getMaxNumSGPRs(), and getMinNumSGPRs().

Referenced by getMaxNumSGPRs(), and getMaxNumSGPRs().

◆ getBaseMaxNumVGPRs()

unsigned GCNSubtarget::getBaseMaxNumVGPRs ( const Function & F,
std::pair< unsigned, unsigned > NumVGPRBounds ) const
Returns
max num VGPRs. This is the common utility function called by MachineFunction and Function variants of getMaxNumVGPRs.

Definition at line 553 of file GCNSubtarget.cpp.

References F.

Referenced by getMaxNumVGPRs().

◆ getBaseReservedNumSGPRs()

unsigned GCNSubtarget::getBaseReservedNumSGPRs ( const bool HasFlatScratch) const
Returns
Reserved number of SGPRs. This is common utility function called by MachineFunction and Function variants of getReservedNumSGPRs.

Definition at line 422 of file GCNSubtarget.cpp.

References getGeneration(), llvm::AMDGPUSubtarget::GFX10, isXNACKEnabled(), llvm::AMDGPUSubtarget::SEA_ISLANDS, and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

Referenced by getReservedNumSGPRs(), and getReservedNumSGPRs().

◆ getBoolRC()

const TargetRegisterClass * llvm::GCNSubtarget::getBoolRC ( ) const
inline

Definition at line 895 of file GCNSubtarget.h.

References llvm::SIRegisterInfo::getBoolRC(), and getRegisterInfo().

◆ getCallLowering()

const CallLowering * llvm::GCNSubtarget::getCallLowering ( ) const
inlineoverride

Definition at line 117 of file GCNSubtarget.h.

◆ getConstantBusLimit()

unsigned GCNSubtarget::getConstantBusLimit ( unsigned Opcode) const

Definition at line 208 of file GCNSubtarget.cpp.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ getDynamicVGPRBlockSize()

unsigned llvm::GCNSubtarget::getDynamicVGPRBlockSize ( ) const
inline

Definition at line 949 of file GCNSubtarget.h.

References DynamicVGPRBlockSize32.

Referenced by computeOccupancy(), and getMaxNumVGPRs().

◆ getFrameLowering()

const SIFrameLowering * llvm::GCNSubtarget::getFrameLowering ( ) const
inlineoverride

Definition at line 105 of file GCNSubtarget.h.

◆ getGeneration()

Generation llvm::GCNSubtarget::getGeneration ( ) const
inline

Definition at line 147 of file GCNSubtarget.h.

References Gen.

Referenced by createOccupancy(), flatScratchIsPointer(), getBaseReservedNumSGPRs(), getConstantBusLimit(), getMaxWaveScratchSize(), getNSAThreshold(), getOccupancyWithNumSGPRs(), getSetRegWaitStates(), getSNopBits(), has12DWordStoreHazard(), hasAddr64(), hasD16Images(), hasD16LoadStore(), hasDenormModeInst(), hasDPPRowShare(), hasDsAtomicAsyncBarrierArriveB64PipeBug(), hasExpertSchedulingMode(), hasExtendedWaitCounts(), hasFlat(), hasFlatLgkmVMemCountInOrder(), hasFlatScratchHiInB64InstHazard(), hasFlatScratchSVSSwizzleBug(), hasFmaakFmamkF32Insts(), hasFPAtomicToDenormModeHazard(), hasFractBug(), hasGWSAutoReplay(), hasInstPrefetch(), hasLdsDirect(), hasLdsWaitVMSRC(), hasLegacyGeometry(), hasMad64_32(), hasMed3_16(), hasMergedShaders(), hasMin3Max3_16(), hasMultiDwordFlatScratchAddressing(), hasNoF16PseudoScalarTransInlineConstants(), hasNonNSAEncoding(), hasOnlyRevVALUShifts(), hasPermLane64(), hasPermLaneX16(), hasPKF32InstsReplicatingLower32BitsOfScalarInput(), hasReadM0LdsDirectHazard(), hasReadM0LdsDmaHazard(), hasReadM0MovRelInterpHazard(), hasReadM0SendMsgHazard(), hasReadVCCZBug(), hasRFEHazards(), hasRrWGMode(), hasScalarAddSub64(), hasScalarCompareEq64(), hasScalarSMulU64(), hasScalarSubwordLoads(), hasSCmpK(), hasScratchBaseForwardingHazard(), hasSignedScratchOffsets(), hasSMRDReadVALUDefHazard(), hasSplitBarriers(), hasUsableDivScaleConditionOutput(), hasUsableDSOffset(), hasVALUMaskWriteHazard(), hasVALUPartialForwardingHazard(), hasVMEMReadSGPRVALUDefHazard(), hasVOP3DPP(), initializeSubtargetDependencies(), isGFX11Plus(), ldsRequiresM0Init(), partialVCCWritesUpdateVCCZ(), privateMemoryResourceIsRangeChecked(), requiresWaitOnWorkgroupReleaseFence(), llvm::AMDGPUAsmPrinter::runOnMachineFunction(), setRegModeNeedsVNOPs(), shouldClusterStores(), supportsBPermute(), supportsGetDoorbellID(), supportsMinMaxDenormModes(), supportsWave32(), supportsWaveWideBPermute(), supportsWGP(), vmemWriteNeedsExpWaitcnt(), and zeroesHigh16BitsOfDest().

◆ getInlineAsmLowering()

const InlineAsmLowering * llvm::GCNSubtarget::getInlineAsmLowering ( ) const
inlineoverride

Definition at line 121 of file GCNSubtarget.h.

◆ getInstCacheLineSize()

unsigned llvm::GCNSubtarget::getInstCacheLineSize ( ) const
inline

Instruction cache line size in bytes (64 for pre-GFX11, 128 for GFX11+).

Definition at line 177 of file GCNSubtarget.h.

References InstCacheLineSize.

Referenced by llvm::SITargetLowering::SITargetLowering().

◆ getInstrInfo()

◆ getInstrItineraryData()

const InstrItineraryData * llvm::GCNSubtarget::getInstrItineraryData ( ) const
inlineoverride

Definition at line 141 of file GCNSubtarget.h.

References InstrItins.

Referenced by adjustSchedDependency().

◆ getInstructionSelector()

InstructionSelector * llvm::GCNSubtarget::getInstructionSelector ( ) const
inlineoverride

Definition at line 125 of file GCNSubtarget.h.

◆ getKnownHighZeroBitsForFrameIndex()

unsigned llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex ( ) const
inline

Return the number of high bits known to be zero for a frame index.

Definition at line 170 of file GCNSubtarget.h.

References llvm::countl_zero(), getMaxWaveScratchSize(), and llvm::AMDGPUSubtarget::getWavefrontSizeLog2().

Referenced by llvm::SITargetLowering::LowerFormalArguments().

◆ getLDSBankCount()

int llvm::GCNSubtarget::getLDSBankCount ( ) const
inline

Definition at line 174 of file GCNSubtarget.h.

References LDSBankCount.

◆ getLegalizerInfo()

const LegalizerInfo * llvm::GCNSubtarget::getLegalizerInfo ( ) const
inlineoverride

Definition at line 129 of file GCNSubtarget.h.

◆ getMaxFlatWorkGroupSize()

unsigned llvm::GCNSubtarget::getMaxFlatWorkGroupSize ( ) const
inlineoverridevirtual
Returns
Maximum flat work group size supported by the subtarget.

Implements llvm::AMDGPUSubtarget.

Definition at line 911 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize().

◆ getMaxLocalMemSizeWithWaveCount()

unsigned llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount ( unsigned WaveCount,
const Function &  ) const

Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

◆ getMaxNumAGPRs()

unsigned llvm::GCNSubtarget::getMaxNumAGPRs ( const Function & F) const
inline

Definition at line 863 of file GCNSubtarget.h.

References F, and getMaxNumVGPRs().

◆ getMaxNumPreloadedSGPRs()

unsigned GCNSubtarget::getMaxNumPreloadedSGPRs ( ) const
Returns
Maximum number of preloaded SGPRs for the subtarget.

Definition at line 523 of file GCNSubtarget.cpp.

Referenced by getMaxNumSGPRs().

◆ getMaxNumSGPRs() [1/3]

unsigned GCNSubtarget::getMaxNumSGPRs ( const Function & F) const
Returns
Maximum number of SGPRs that meets number of waves per execution unit requirement for function F, or number of SGPRs explicitly requested using "amdgpu-num-sgpr" attribute attached to function F.
Value that meets number of waves per execution unit requirement if explicitly requested value cannot be converted to integer, violates subtarget's specifications, or does not meet number of waves per execution unit requirement.

Definition at line 548 of file GCNSubtarget.cpp.

References F, getBaseMaxNumSGPRs(), getMaxNumPreloadedSGPRs(), getReservedNumSGPRs(), and llvm::AMDGPUSubtarget::getWavesPerEU().

◆ getMaxNumSGPRs() [2/3]

unsigned GCNSubtarget::getMaxNumSGPRs ( const MachineFunction & MF) const
Returns
Maximum number of SGPRs that meets number of waves per execution unit requirement for function MF, or number of SGPRs explicitly requested using "amdgpu-num-sgpr" attribute attached to function MF.
Value that meets number of waves per execution unit requirement if explicitly requested value cannot be converted to integer, violates subtarget's specifications, or does not meet number of waves per execution unit requirement.

Definition at line 516 of file GCNSubtarget.cpp.

References F, getBaseMaxNumSGPRs(), llvm::MachineFunction::getFunction(), llvm::MachineFunction::getInfo(), llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs(), getReservedNumSGPRs(), and llvm::SIMachineFunctionInfo::getWavesPerEU().

◆ getMaxNumSGPRs() [3/3]

unsigned llvm::GCNSubtarget::getMaxNumSGPRs ( unsigned WavesPerEU,
bool Addressable ) const
inline
Returns
Maximum number of SGPRs that meets the given number of waves per execution unit requirement supported by the subtarget.

Definition at line 760 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMaxNumSGPRs().

Referenced by getBaseMaxNumSGPRs().

◆ getMaxNumUserSGPRs()

unsigned llvm::GCNSubtarget::getMaxNumUserSGPRs ( ) const
inline

Definition at line 457 of file GCNSubtarget.h.

References llvm::AMDGPU::getMaxNumUserSGPRs().

◆ getMaxNumVectorRegs()

std::pair< unsigned, unsigned > GCNSubtarget::getMaxNumVectorRegs ( const Function & F) const

Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit required for the function MF.

Definition at line 586 of file GCNSubtarget.cpp.

References llvm::alignTo(), assert(), F, getAddressableNumArchVGPRs(), llvm::AMDGPU::getIntegerPairAttribute(), and getMaxNumVGPRs().

◆ getMaxNumVGPRs() [1/3]

unsigned GCNSubtarget::getMaxNumVGPRs ( const Function & F) const
Returns
Maximum number of VGPRs that meets number of waves per execution unit requirement for function F, or number of VGPRs explicitly requested using "amdgpu-num-vgpr" attribute attached to function F.
Value that meets number of waves per execution unit requirement if explicitly requested value cannot be converted to integer, violates subtarget's specifications, or does not meet number of waves per execution unit requirement.

Definition at line 568 of file GCNSubtarget.cpp.

References F, getBaseMaxNumVGPRs(), llvm::AMDGPU::getDynamicVGPRBlockSize(), getDynamicVGPRBlockSize(), getMaxNumVGPRs(), getMinNumVGPRs(), llvm::AMDGPUSubtarget::getWavesPerEU(), and isDynamicVGPREnabled().

◆ getMaxNumVGPRs() [2/3]

unsigned GCNSubtarget::getMaxNumVGPRs ( const MachineFunction & MF) const
Returns
Maximum number of VGPRs that meets number of waves per execution unit requirement for function MF, or number of VGPRs explicitly requested using "amdgpu-num-vgpr" attribute attached to function MF.
Value that meets number of waves per execution unit requirement if explicitly requested value cannot be converted to integer, violates subtarget's specifications, or does not meet number of waves per execution unit requirement.

Definition at line 581 of file GCNSubtarget.cpp.

References llvm::MachineFunction::getFunction(), and getMaxNumVGPRs().

◆ getMaxNumVGPRs() [3/3]

unsigned llvm::GCNSubtarget::getMaxNumVGPRs ( unsigned WavesPerEU,
unsigned DynamicVGPRBlockSize ) const
inline
Returns
the maximum number of VGPRs that can be used and still achieved at least the specified number of waves WavesPerEU.

Definition at line 841 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMaxNumVGPRs().

Referenced by getMaxNumAGPRs(), getMaxNumVectorRegs(), getMaxNumVGPRs(), and getMaxNumVGPRs().

◆ getMaxPrivateElementSize()

unsigned llvm::GCNSubtarget::getMaxPrivateElementSize ( bool ForBufferRSrc = false) const
inline

◆ getMaxWaveScratchSize()

unsigned llvm::GCNSubtarget::getMaxWaveScratchSize ( ) const
inline

◆ getMaxWavesPerEU()

unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( ) const
inline
Returns
Maximum number of waves per execution unit supported by the subtarget without any kind of limitation.

Definition at line 287 of file AMDGPUSubtarget.h.

Referenced by getOccupancyWithNumSGPRs().

◆ getMaxWorkGroupsPerCU()

unsigned llvm::GCNSubtarget::getMaxWorkGroupsPerCU ( unsigned FlatWorkGroupSize) const
inlineoverridevirtual
Returns
Maximum number of work groups per compute unit supported by the subtarget and limited by given FlatWorkGroupSize.

Implements llvm::AMDGPUSubtarget.

Definition at line 901 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU().

◆ getMinFlatWorkGroupSize()

unsigned llvm::GCNSubtarget::getMinFlatWorkGroupSize ( ) const
inlineoverridevirtual
Returns
Minimum flat work group size supported by the subtarget.

Implements llvm::AMDGPUSubtarget.

Definition at line 906 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize().

◆ getMinNumSGPRs()

unsigned llvm::GCNSubtarget::getMinNumSGPRs ( unsigned WavesPerEU) const
inline
Returns
Minimum number of SGPRs that meets the given number of waves per execution unit requirement supported by the subtarget.

Definition at line 754 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMinNumSGPRs().

Referenced by getBaseMaxNumSGPRs().

◆ getMinNumVGPRs()

unsigned llvm::GCNSubtarget::getMinNumVGPRs ( unsigned WavesPerEU,
unsigned DynamicVGPRBlockSize ) const
inline
Returns
the minimum number of VGPRs that will prevent achieving more than the specified number of waves WavesPerEU.

Definition at line 833 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMinNumVGPRs().

Referenced by getMaxNumVGPRs().

◆ getMinWavesPerEU()

unsigned llvm::GCNSubtarget::getMinWavesPerEU ( ) const
inlineoverridevirtual
Returns
Minimum number of waves per execution unit supported by the subtarget.

Implements llvm::AMDGPUSubtarget.

Definition at line 924 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getMinWavesPerEU().

◆ getNSAMaxSize()

unsigned llvm::GCNSubtarget::getNSAMaxSize ( bool HasSampler = false) const
inline

Definition at line 493 of file GCNSubtarget.h.

References llvm::AMDGPU::getNSAMaxSize().

◆ getNSAThreshold()

◆ getOccupancyWithNumSGPRs()

unsigned GCNSubtarget::getOccupancyWithNumSGPRs ( unsigned SGPRs) const

Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.

Definition at line 409 of file GCNSubtarget.cpp.

References getGeneration(), getMaxWavesPerEU(), and llvm::AMDGPU::IsaInfo::getOccupancyWithNumSGPRs().

Referenced by computeOccupancy().

◆ getOccupancyWithNumVGPRs()

unsigned GCNSubtarget::getOccupancyWithNumVGPRs ( unsigned VGPRs,
unsigned DynamicVGPRBlockSize ) const

Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.

Definition at line 415 of file GCNSubtarget.cpp.

References llvm::AMDGPU::IsaInfo::getNumWavesPerEUWithNumVGPRs().

Referenced by computeOccupancy().

◆ getRegBankInfo()

const AMDGPURegisterBankInfo * llvm::GCNSubtarget::getRegBankInfo ( ) const
inlineoverride

Definition at line 133 of file GCNSubtarget.h.

◆ getRegisterInfo()

◆ getReservedNumSGPRs() [1/2]

unsigned GCNSubtarget::getReservedNumSGPRs ( const Function & F) const
Returns
Reserved number of SGPRs for given function F.

Definition at line 443 of file GCNSubtarget.cpp.

References F, and getBaseReservedNumSGPRs().

◆ getReservedNumSGPRs() [2/2]

unsigned GCNSubtarget::getReservedNumSGPRs ( const MachineFunction & MF) const
Returns
Reserved number of SGPRs for given machine function MF.

Definition at line 438 of file GCNSubtarget.cpp.

References getBaseReservedNumSGPRs(), llvm::MachineFunction::getInfo(), llvm::SIMachineFunctionInfo::getUserSGPRInfo(), and llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit().

Referenced by getMaxNumSGPRs(), and getMaxNumSGPRs().

◆ getScalarizeGlobalBehavior()

bool llvm::GCNSubtarget::getScalarizeGlobalBehavior ( ) const
inline

Definition at line 441 of file GCNSubtarget.h.

References ScalarizeGlobal.

◆ getSelectionDAGInfo()

const SelectionDAGTargetInfo * GCNSubtarget::getSelectionDAGInfo ( ) const
override

Definition at line 204 of file GCNSubtarget.cpp.

◆ getSetRegWaitStates()

unsigned llvm::GCNSubtarget::getSetRegWaitStates ( ) const
inline

Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.

Definition at line 273 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ getSGPRAllocGranule()

unsigned llvm::GCNSubtarget::getSGPRAllocGranule ( ) const
inline
Returns
SGPR allocation granularity supported by the subtarget.

Definition at line 733 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getSGPRAllocGranule().

◆ getSGPREncodingGranule()

unsigned llvm::GCNSubtarget::getSGPREncodingGranule ( ) const
inline
Returns
SGPR encoding granularity supported by the subtarget.

Definition at line 738 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getSGPREncodingGranule().

◆ getSNopBits()

unsigned llvm::GCNSubtarget::getSNopBits ( ) const
inline
Returns
the number of significant bits in the immediate field of the S_NOP instruction.

Definition at line 986 of file GCNSubtarget.h.

References getGeneration(), llvm::AMDGPUSubtarget::GFX12, and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ getStackAlignment()

Align llvm::GCNSubtarget::getStackAlignment ( ) const
inline

Definition at line 432 of file GCNSubtarget.h.

Referenced by GCNSubtarget().

◆ getTargetID()

const AMDGPU::IsaInfo::AMDGPUTargetID & llvm::GCNSubtarget::getTargetID ( ) const
inline

Definition at line 137 of file GCNSubtarget.h.

References TargetID.

Referenced by llvm::AMDGPUAsmPrinter::runOnMachineFunction().

◆ getTargetLowering()

const SITargetLowering * llvm::GCNSubtarget::getTargetLowering ( ) const
inlineoverride

Definition at line 109 of file GCNSubtarget.h.

Referenced by GCNSubtarget(), and getTM().

◆ getTotalNumSGPRs()

unsigned llvm::GCNSubtarget::getTotalNumSGPRs ( ) const
inline
Returns
Total number of SGPRs supported by the subtarget.

Definition at line 743 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getTotalNumSGPRs().

◆ getTotalNumVGPRs()

unsigned llvm::GCNSubtarget::getTotalNumVGPRs ( ) const
inline
Returns
Total number of VGPRs supported by the subtarget.

Definition at line 816 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getTotalNumVGPRs().

◆ getTrapHandlerAbi()

TrapHandlerAbi llvm::GCNSubtarget::getTrapHandlerAbi ( ) const
inline

Definition at line 229 of file GCNSubtarget.h.

References AMDHSA, llvm::AMDGPUSubtarget::isAmdHsaOS(), and NONE.

◆ getVGPRAllocGranule()

unsigned llvm::GCNSubtarget::getVGPRAllocGranule ( unsigned DynamicVGPRBlockSize) const
inline
Returns
VGPR allocation granularity supported by the subtarget.

Definition at line 806 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getVGPRAllocGranule().

◆ getVGPREncodingGranule()

unsigned llvm::GCNSubtarget::getVGPREncodingGranule ( ) const
inline
Returns
VGPR encoding granularity supported by the subtarget.

Definition at line 811 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getVGPREncodingGranule().

◆ getWavesPerEUForWorkGroup()

unsigned llvm::GCNSubtarget::getWavesPerEUForWorkGroup ( unsigned FlatWorkGroupSize) const
inlineoverridevirtual
Returns
Number of waves per execution unit required to support the given FlatWorkGroupSize.

Implements llvm::AMDGPUSubtarget.

Definition at line 918 of file GCNSubtarget.h.

References llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup().

◆ has12DWordStoreHazard()

bool llvm::GCNSubtarget::has12DWordStoreHazard ( ) const
inline

Definition at line 513 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS.

◆ hasAddPC64Inst()

bool llvm::GCNSubtarget::hasAddPC64Inst ( ) const
inline

Definition at line 638 of file GCNSubtarget.h.

◆ hasAddr64()

bool llvm::GCNSubtarget::hasAddr64 ( ) const
inline

◆ hasAtomicCSub()

bool llvm::GCNSubtarget::hasAtomicCSub ( ) const
inline

Definition at line 349 of file GCNSubtarget.h.

◆ hasAtomicFaddInsts()

bool llvm::GCNSubtarget::hasAtomicFaddInsts ( ) const
inline

Definition at line 407 of file GCNSubtarget.h.

◆ hasCompressedExport()

bool llvm::GCNSubtarget::hasCompressedExport ( ) const
inline

Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (enable) bits.

Definition at line 605 of file GCNSubtarget.h.

◆ hasCondSubInsts()

bool llvm::GCNSubtarget::hasCondSubInsts ( ) const
inline

Definition at line 728 of file GCNSubtarget.h.

◆ hasCvtScaleForwardingHazard()

bool llvm::GCNSubtarget::hasCvtScaleForwardingHazard ( ) const
inline

Definition at line 575 of file GCNSubtarget.h.

◆ hasD16Images()

bool llvm::GCNSubtarget::hasD16Images ( ) const
inline

Definition at line 380 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasD16LoadStore()

bool llvm::GCNSubtarget::hasD16LoadStore ( ) const
inline

Definition at line 374 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

Referenced by d16PreservesUnusedBits().

◆ hasDelayAlu()

bool llvm::GCNSubtarget::hasDelayAlu ( ) const
inline

Return true if the target has the S_DELAY_ALU instruction.

Definition at line 614 of file GCNSubtarget.h.

◆ hasDenormModeInst()

bool llvm::GCNSubtarget::hasDenormModeInst ( ) const
inline
Returns
If target supports S_DENORM_MODE.

Definition at line 287 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasDOTOpSelHazard()

bool llvm::GCNSubtarget::hasDOTOpSelHazard ( ) const
inline

Definition at line 554 of file GCNSubtarget.h.

◆ hasDPPRowShare()

bool llvm::GCNSubtarget::hasDPPRowShare ( ) const
inline

Definition at line 478 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasDS96AndDS128()

bool llvm::GCNSubtarget::hasDS96AndDS128 ( ) const
inline
Returns
If target supports ds_read/write_b96/128.

Definition at line 296 of file GCNSubtarget.h.

◆ hasDsAtomicAsyncBarrierArriveB64PipeBug()

bool llvm::GCNSubtarget::hasDsAtomicAsyncBarrierArriveB64PipeBug ( ) const
inline

Definition at line 961 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasDstSelForwardingHazard()

bool llvm::GCNSubtarget::hasDstSelForwardingHazard ( ) const
inline

Definition at line 551 of file GCNSubtarget.h.

◆ hasDwordx3LoadStores()

bool llvm::GCNSubtarget::hasDwordx3LoadStores ( ) const
inline

Definition at line 518 of file GCNSubtarget.h.

◆ hasExpertSchedulingMode()

bool llvm::GCNSubtarget::hasExpertSchedulingMode ( ) const
inline
Returns
true if the target supports expert scheduling mode 2 which relies on the compiler to insert waits to avoid hazards between VMEM and VALU instructions in some instances.

Definition at line 643 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasExportInsts()

bool llvm::GCNSubtarget::hasExportInsts ( ) const
inline

Definition at line 355 of file GCNSubtarget.h.

◆ hasExtendedWaitCounts()

bool llvm::GCNSubtarget::hasExtendedWaitCounts ( ) const
inline
Returns
true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.

Definition at line 623 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasFlat()

bool llvm::GCNSubtarget::hasFlat ( ) const
inline

◆ hasFlatLgkmVMemCountInOrder()

bool llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder ( ) const
inline

Definition at line 372 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasFlatScratchEnabled()

bool llvm::GCNSubtarget::hasFlatScratchEnabled ( ) const
inline

Definition at line 342 of file GCNSubtarget.h.

Referenced by getMaxPrivateElementSize().

◆ hasFlatScratchHiInB64InstHazard()

bool llvm::GCNSubtarget::hasFlatScratchHiInB64InstHazard ( ) const
inline

Definition at line 973 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasFlatScratchSTMode()

bool llvm::GCNSubtarget::hasFlatScratchSTMode ( ) const
inline

Definition at line 336 of file GCNSubtarget.h.

◆ hasFlatScratchSVSMode()

bool llvm::GCNSubtarget::hasFlatScratchSVSMode ( ) const
inline

Definition at line 340 of file GCNSubtarget.h.

◆ hasFlatScratchSVSSwizzleBug()

bool llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug ( ) const
inline

Definition at line 611 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasFlatScrRegister()

bool llvm::GCNSubtarget::hasFlatScrRegister ( ) const
inline

Definition at line 331 of file GCNSubtarget.h.

◆ hasFmaakFmamkF32Insts()

bool llvm::GCNSubtarget::hasFmaakFmamkF32Insts ( ) const
inline

Definition at line 485 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasFmaakFmamkF64Insts()

bool llvm::GCNSubtarget::hasFmaakFmamkF64Insts ( ) const
inline

Definition at line 489 of file GCNSubtarget.h.

◆ hasFormattedMUBUFInsts()

bool llvm::GCNSubtarget::hasFormattedMUBUFInsts ( ) const
inline

Definition at line 353 of file GCNSubtarget.h.

◆ hasFPAtomicToDenormModeHazard()

bool llvm::GCNSubtarget::hasFPAtomicToDenormModeHazard ( ) const
inline

Definition at line 561 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasFractBug()

bool llvm::GCNSubtarget::hasFractBug ( ) const
inline

Definition at line 213 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS.

◆ hasGetPCZeroExtension()

bool llvm::GCNSubtarget::hasGetPCZeroExtension ( ) const
inline

Definition at line 718 of file GCNSubtarget.h.

◆ hasGlobalAddTidInsts()

bool llvm::GCNSubtarget::hasGlobalAddTidInsts ( ) const
inline

Definition at line 347 of file GCNSubtarget.h.

◆ hasGWSAutoReplay()

bool llvm::GCNSubtarget::hasGWSAutoReplay ( ) const
inline

Definition at line 391 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasGWSSemaReleaseAll()

bool llvm::GCNSubtarget::hasGWSSemaReleaseAll ( ) const
inline
Returns
if target has ds_gws_sema_release_all instruction.

Definition at line 394 of file GCNSubtarget.h.

◆ hasHalfRate64Ops()

bool llvm::GCNSubtarget::hasHalfRate64Ops ( const TargetSubtargetInfo & STI)
static

◆ hasHardClauses()

bool llvm::GCNSubtarget::hasHardClauses ( ) const
inline

Definition at line 559 of file GCNSubtarget.h.

References MaxHardClauseLength.

◆ hasHWFP64()

bool llvm::GCNSubtarget::hasHWFP64 ( ) const
inline

Definition at line 197 of file GCNSubtarget.h.

◆ hasInstPrefetch()

bool llvm::GCNSubtarget::hasInstPrefetch ( ) const
inline

◆ hasIntMinMax64()

bool llvm::GCNSubtarget::hasIntMinMax64 ( ) const
inline

Definition at line 707 of file GCNSubtarget.h.

◆ hasINVWBL2WaitCntRequirement()

bool llvm::GCNSubtarget::hasINVWBL2WaitCntRequirement ( ) const
inline

Definition at line 695 of file GCNSubtarget.h.

◆ hasLdsAtomicAddF64()

bool llvm::GCNSubtarget::hasLdsAtomicAddF64 ( ) const
inline

Definition at line 364 of file GCNSubtarget.h.

◆ hasLdsDirect()

bool llvm::GCNSubtarget::hasLdsDirect ( ) const
inline

Definition at line 567 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasLDSFPAtomicAddF32()

bool llvm::GCNSubtarget::hasLDSFPAtomicAddF32 ( ) const
inline

Definition at line 467 of file GCNSubtarget.h.

◆ hasLDSFPAtomicAddF64()

bool llvm::GCNSubtarget::hasLDSFPAtomicAddF64 ( ) const
inline

Definition at line 468 of file GCNSubtarget.h.

◆ hasLDSLoadB96_B128()

bool llvm::GCNSubtarget::hasLDSLoadB96_B128 ( ) const
inline

Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.

Definition at line 619 of file GCNSubtarget.h.

◆ hasLDSMisalignedBugInWGPMode()

bool llvm::GCNSubtarget::hasLDSMisalignedBugInWGPMode ( ) const
inline

Definition at line 537 of file GCNSubtarget.h.

◆ hasLdsWaitVMSRC()

bool llvm::GCNSubtarget::hasLdsWaitVMSRC ( ) const
inline

Definition at line 569 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasLegacyGeometry()

bool llvm::GCNSubtarget::hasLegacyGeometry ( ) const
inline

Definition at line 683 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasLoopHeadInstSplitSensitivity()

bool llvm::GCNSubtarget::hasLoopHeadInstSplitSensitivity ( ) const
inline

Definition at line 581 of file GCNSubtarget.h.

◆ hasMad64_32()

bool llvm::GCNSubtarget::hasMad64_32 ( ) const
inline

Definition at line 405 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ hasMadF16()

bool GCNSubtarget::hasMadF16 ( ) const

Definition at line 399 of file GCNSubtarget.cpp.

Referenced by llvm::SITargetLowering::SITargetLowering().

◆ hasMadU64U32NoCarry()

bool llvm::GCNSubtarget::hasMadU64U32NoCarry ( ) const
inline

Definition at line 704 of file GCNSubtarget.h.

◆ hasMed3_16()

bool llvm::GCNSubtarget::hasMed3_16 ( ) const
inline

Definition at line 215 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasMergedShaders()

bool llvm::GCNSubtarget::hasMergedShaders ( ) const
inline
Returns
true if the machine has merged shaders in which s0-s7 are reserved by the hardware and user SGPRs start at s8

Definition at line 680 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasMin3Max3_16()

bool llvm::GCNSubtarget::hasMin3Max3_16 ( ) const
inline

Definition at line 217 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

Referenced by supportsMin3Max3().

◆ hasMovB64()

bool llvm::GCNSubtarget::hasMovB64 ( ) const
inline

Definition at line 499 of file GCNSubtarget.h.

◆ hasMTBUFInsts()

bool llvm::GCNSubtarget::hasMTBUFInsts ( ) const
inline

Definition at line 351 of file GCNSubtarget.h.

◆ hasMultiDwordFlatScratchAddressing()

bool llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing ( ) const
inline

Definition at line 368 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasNoF16PseudoScalarTransInlineConstants()

bool llvm::GCNSubtarget::hasNoF16PseudoScalarTransInlineConstants ( ) const
inline
Returns
true if inline constants are not supported for F16 pseudo scalar transcendentals.

Definition at line 627 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasNonNSAEncoding()

bool llvm::GCNSubtarget::hasNonNSAEncoding ( ) const
inline

Definition at line 491 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasNullExportTarget()

bool llvm::GCNSubtarget::hasNullExportTarget ( ) const
inline

Return true if the target's EXP instruction supports the NULL export target.

Definition at line 609 of file GCNSubtarget.h.

◆ hasOnlyRevVALUShifts()

bool llvm::GCNSubtarget::hasOnlyRevVALUShifts ( ) const
inline

Definition at line 209 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasPermLane64()

bool llvm::GCNSubtarget::hasPermLane64 ( ) const
inline
Returns
true if the subtarget has the v_permlane64_b32 instruction.

Definition at line 476 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasPermLaneX16()

bool llvm::GCNSubtarget::hasPermLaneX16 ( ) const
inline
Returns
true if the subtarget has the v_permlanex16_b32 instruction.

Definition at line 473 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ hasPKF32InstsReplicatingLower32BitsOfScalarInput()

bool llvm::GCNSubtarget::hasPKF32InstsReplicatingLower32BitsOfScalarInput ( ) const
inline
Returns
true if the target has packed f32 instructions that only read 32 bits from a scalar operand (SGPR or literal) and replicates the bits to both channels.

Definition at line 634 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasPkMinMax3Insts()

bool llvm::GCNSubtarget::hasPkMinMax3Insts ( ) const
inline

Definition at line 710 of file GCNSubtarget.h.

◆ hasPkMovB32()

bool llvm::GCNSubtarget::hasPkMovB32 ( ) const
inline

Definition at line 483 of file GCNSubtarget.h.

◆ hasPrefetch()

bool llvm::GCNSubtarget::hasPrefetch ( ) const
inline

Definition at line 419 of file GCNSubtarget.h.

◆ hasReadM0LdsDirectHazard()

bool llvm::GCNSubtarget::hasReadM0LdsDirectHazard ( ) const
inline

Definition at line 533 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasReadM0LdsDmaHazard()

bool llvm::GCNSubtarget::hasReadM0LdsDmaHazard ( ) const
inline

Definition at line 529 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasReadM0MovRelInterpHazard()

bool llvm::GCNSubtarget::hasReadM0MovRelInterpHazard ( ) const
inline

Definition at line 520 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ hasReadM0SendMsgHazard()

bool llvm::GCNSubtarget::hasReadM0SendMsgHazard ( ) const
inline

◆ hasReadVCCZBug()

bool llvm::GCNSubtarget::hasReadVCCZBug ( ) const
inline

Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.

Definition at line 253 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ hasRFEHazards()

bool llvm::GCNSubtarget::hasRFEHazards ( ) const
inline

Definition at line 270 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasRrWGMode()

bool llvm::GCNSubtarget::hasRrWGMode ( ) const
inline

Definition at line 689 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScalarAddSub64()

bool llvm::GCNSubtarget::hasScalarAddSub64 ( ) const
inline

Definition at line 396 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScalarCompareEq64()

bool llvm::GCNSubtarget::hasScalarCompareEq64 ( ) const
inline

Definition at line 463 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasScalarMulHiInsts()

bool llvm::GCNSubtarget::hasScalarMulHiInsts ( ) const
inline

Definition at line 225 of file GCNSubtarget.h.

◆ hasScalarPackInsts()

bool llvm::GCNSubtarget::hasScalarPackInsts ( ) const
inline

Definition at line 223 of file GCNSubtarget.h.

◆ hasScalarSMulU64()

bool llvm::GCNSubtarget::hasScalarSMulU64 ( ) const
inline

Definition at line 398 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScalarSubwordLoads()

bool llvm::GCNSubtarget::hasScalarSubwordLoads ( ) const
inline

Definition at line 227 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScaleOffset()

bool llvm::GCNSubtarget::hasScaleOffset ( ) const
inline

Definition at line 502 of file GCNSubtarget.h.

◆ hasSCmpK()

bool llvm::GCNSubtarget::hasSCmpK ( ) const
inline

Definition at line 422 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasScratchBaseForwardingHazard()

bool llvm::GCNSubtarget::hasScratchBaseForwardingHazard ( ) const
inline

Definition at line 967 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasSGetShaderCyclesInst()

bool llvm::GCNSubtarget::hasSGetShaderCyclesInst ( ) const
inline

Definition at line 713 of file GCNSubtarget.h.

◆ hasShift64HighRegBug()

bool llvm::GCNSubtarget::hasShift64HighRegBug ( ) const
inline

Definition at line 543 of file GCNSubtarget.h.

◆ hasSignedGVSOffset()

bool llvm::GCNSubtarget::hasSignedGVSOffset ( ) const
inline

Definition at line 505 of file GCNSubtarget.h.

◆ hasSignedScratchOffsets()

bool llvm::GCNSubtarget::hasSignedScratchOffsets ( ) const
inline
Returns
true if VADDR and SADDR fields in VSCRATCH can use negative values.

Definition at line 693 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasSMRDReadVALUDefHazard()

bool llvm::GCNSubtarget::hasSMRDReadVALUDefHazard ( ) const
inline

A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU instruction.

Definition at line 260 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS.

◆ hasSPackHL()

bool llvm::GCNSubtarget::hasSPackHL ( ) const
inline

Return true if the target has the S_PACK_HL_B32_B16 instruction.

Definition at line 601 of file GCNSubtarget.h.

◆ hasSplitBarriers()

bool llvm::GCNSubtarget::hasSplitBarriers ( ) const
inline

Definition at line 686 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ hasSubClampInsts()

bool llvm::GCNSubtarget::hasSubClampInsts ( ) const
inline

Definition at line 730 of file GCNSubtarget.h.

◆ hasSwap()

bool llvm::GCNSubtarget::hasSwap ( ) const
inline

Definition at line 221 of file GCNSubtarget.h.

◆ hasTransForwardingHazard()

bool llvm::GCNSubtarget::hasTransForwardingHazard ( ) const
inline

Definition at line 547 of file GCNSubtarget.h.

◆ hasUnalignedBufferAccessEnabled()

bool llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled ( ) const
inline

Definition at line 311 of file GCNSubtarget.h.

◆ hasUnalignedDSAccessEnabled()

bool llvm::GCNSubtarget::hasUnalignedDSAccessEnabled ( ) const
inline

Definition at line 315 of file GCNSubtarget.h.

◆ hasUnalignedScratchAccessEnabled()

bool llvm::GCNSubtarget::hasUnalignedScratchAccessEnabled ( ) const
inline

Definition at line 319 of file GCNSubtarget.h.

◆ hasUsableDivScaleConditionOutput()

bool llvm::GCNSubtarget::hasUsableDivScaleConditionOutput ( ) const
inline

Condition output from div_scale is usable.

Definition at line 247 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS.

◆ hasUsableDSOffset()

bool llvm::GCNSubtarget::hasUsableDSOffset ( ) const
inline

True if the offset field of DS instructions works as expected.

On SI, the offset uses a 16-bit adder and does not always wrap properly.

Definition at line 240 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ hasUserSGPRInit16BugInWave32()

bool llvm::GCNSubtarget::hasUserSGPRInit16BugInWave32 ( ) const
inline

Definition at line 509 of file GCNSubtarget.h.

References isWave32().

◆ hasVALUMaskWriteHazard()

bool llvm::GCNSubtarget::hasVALUMaskWriteHazard ( ) const
inline

Definition at line 587 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasVALUPartialForwardingHazard()

bool llvm::GCNSubtarget::hasVALUPartialForwardingHazard ( ) const
inline

Definition at line 571 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasVALUReadSGPRHazard()

bool llvm::GCNSubtarget::hasVALUReadSGPRHazard ( ) const
inline

Definition at line 589 of file GCNSubtarget.h.

◆ hasVDecCoExecHazard()

bool llvm::GCNSubtarget::hasVDecCoExecHazard ( ) const
inline

Definition at line 557 of file GCNSubtarget.h.

◆ hasVectorMulU64()

bool llvm::GCNSubtarget::hasVectorMulU64 ( ) const
inline

Definition at line 700 of file GCNSubtarget.h.

◆ hasVINTERPEncoding()

bool llvm::GCNSubtarget::hasVINTERPEncoding ( ) const
inline

Definition at line 359 of file GCNSubtarget.h.

◆ hasVMEMReadSGPRVALUDefHazard()

bool llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard ( ) const
inline

A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU Instruction.

Definition at line 266 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ hasVOP3DPP()

bool llvm::GCNSubtarget::hasVOP3DPP ( ) const
inline

Definition at line 565 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ hasVOPD3()

bool llvm::GCNSubtarget::hasVOPD3 ( ) const
inline

Definition at line 697 of file GCNSubtarget.h.

◆ haveRoundOpsF64()

bool llvm::GCNSubtarget::haveRoundOpsF64 ( ) const
inline

Have v_trunc_f64, v_ceil_f64, v_rndne_f64.

Definition at line 299 of file GCNSubtarget.h.

◆ initializeSubtargetDependencies()

◆ isCuModeEnabled()

bool llvm::GCNSubtarget::isCuModeEnabled ( ) const
inline

Definition at line 327 of file GCNSubtarget.h.

◆ isDynamicVGPREnabled()

bool llvm::GCNSubtarget::isDynamicVGPREnabled ( ) const
inline

Definition at line 948 of file GCNSubtarget.h.

References DynamicVGPR.

Referenced by computeOccupancy(), and getMaxNumVGPRs().

◆ isGFX11Plus()

bool llvm::GCNSubtarget::isGFX11Plus ( ) const
inline

Definition at line 149 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ isMesaGfxShader()

bool llvm::GCNSubtarget::isMesaGfxShader ( const Function & F) const
inline

Definition at line 401 of file GCNSubtarget.h.

References F, llvm::AMDGPUSubtarget::isMesa3DOS(), and llvm::AMDGPU::isShader().

◆ isPreciseMemoryEnabled()

bool llvm::GCNSubtarget::isPreciseMemoryEnabled ( ) const
inline

Definition at line 329 of file GCNSubtarget.h.

◆ isTgSplitEnabled()

bool llvm::GCNSubtarget::isTgSplitEnabled ( ) const
inline

Definition at line 325 of file GCNSubtarget.h.

Referenced by requiresWaitOnWorkgroupReleaseFence().

◆ isWave32()

bool llvm::GCNSubtarget::isWave32 ( ) const
inline

◆ isWave64()

bool llvm::GCNSubtarget::isWave64 ( ) const
inline

Definition at line 885 of file GCNSubtarget.h.

References llvm::AMDGPUSubtarget::getWavefrontSize().

Referenced by lowerWaveShuffle().

◆ isWaveSizeKnown()

bool llvm::GCNSubtarget::isWaveSizeKnown ( ) const
inline

Returns if the wavesize of this subtarget is known reliable.

This is false only for the a default target-cpu that does not have an explicit +wavefrontsize target feature.

Definition at line 890 of file GCNSubtarget.h.

◆ isXNACKEnabled()

bool llvm::GCNSubtarget::isXNACKEnabled ( ) const
inline

Definition at line 323 of file GCNSubtarget.h.

References TargetID.

Referenced by getBaseReservedNumSGPRs(), and needsConstrainedOpcode().

◆ ldsRequiresM0Init()

bool llvm::GCNSubtarget::ldsRequiresM0Init ( ) const
inline

Return if most LDS instructions have an m0 use that require m0 to be initialized.

Definition at line 384 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ loadStoreOptEnabled()

bool llvm::GCNSubtarget::loadStoreOptEnabled ( ) const
inline

Definition at line 507 of file GCNSubtarget.h.

References EnableLoadStoreOpt.

◆ maxHardClauseLength()

unsigned llvm::GCNSubtarget::maxHardClauseLength ( ) const
inline
Returns
The maximum number of instructions that can be enclosed in an S_CLAUSE on the given subtarget, or 0 for targets that do not support that instruction.

Definition at line 648 of file GCNSubtarget.h.

References MaxHardClauseLength.

◆ mirFileLoaded()

void GCNSubtarget::mirFileLoaded ( MachineFunction & MF) const
override

Definition at line 388 of file GCNSubtarget.cpp.

References isWave32(), MBB, and MI.

◆ needsAlignedVGPRs()

bool llvm::GCNSubtarget::needsAlignedVGPRs ( ) const
inline

Return if operations acting on VGPR tuples require even alignment.

Definition at line 598 of file GCNSubtarget.h.

◆ needsKernArgPreloadProlog()

bool llvm::GCNSubtarget::needsKernArgPreloadProlog ( ) const
inline

Definition at line 724 of file GCNSubtarget.h.

◆ overridePostRASchedPolicy()

void GCNSubtarget::overridePostRASchedPolicy ( MachineSchedPolicy & Policy,
const SchedRegion & Region ) const
override

◆ overrideSchedPolicy()

◆ ParseSubtargetFeatures()

void llvm::GCNSubtarget::ParseSubtargetFeatures ( StringRef CPU,
StringRef TuneCPU,
StringRef FS )

◆ partialVCCWritesUpdateVCCZ()

bool llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ ( ) const
inline

Writes to VCC_LO/VCC_HI update the VCCZ flag.

Definition at line 256 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ privateMemoryResourceIsRangeChecked()

bool llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked ( ) const
inline
Returns
If MUBUF instructions always perform range checking, even for buffer resources used for private memory access.

Definition at line 303 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ requiresCodeObjectV6()

bool llvm::GCNSubtarget::requiresCodeObjectV6 ( ) const
inline

Definition at line 583 of file GCNSubtarget.h.

◆ requiresDisjointEarlyClobberAndUndef()

bool llvm::GCNSubtarget::requiresDisjointEarlyClobberAndUndef ( ) const
inlineoverride

Definition at line 953 of file GCNSubtarget.h.

◆ requiresNopBeforeDeallocVGPRs()

bool llvm::GCNSubtarget::requiresNopBeforeDeallocVGPRs ( ) const
inline

Definition at line 942 of file GCNSubtarget.h.

◆ requiresWaitIdleBeforeGetReg()

bool llvm::GCNSubtarget::requiresWaitIdleBeforeGetReg ( ) const
inline

Definition at line 946 of file GCNSubtarget.h.

◆ requiresWaitOnWorkgroupReleaseFence()

bool llvm::GCNSubtarget::requiresWaitOnWorkgroupReleaseFence ( ) const
inline

◆ requiresWaitXCntForSingleAccessInstructions()

bool llvm::GCNSubtarget::requiresWaitXCntForSingleAccessInstructions ( ) const
inline
Returns
true if the subtarget requires a wait for xcnt before VMEM accesses that must never be repeated in the event of a page fault/re-try. Atomic stores/rmw and all volatile accesses fall under this criteria.

Definition at line 980 of file GCNSubtarget.h.

◆ setRegModeNeedsVNOPs()

bool llvm::GCNSubtarget::setRegModeNeedsVNOPs ( ) const
inline

Definition at line 593 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX12.

◆ setScalarizeGlobalBehavior()

void llvm::GCNSubtarget::setScalarizeGlobalBehavior ( bool b)
inline

Definition at line 440 of file GCNSubtarget.h.

References ScalarizeGlobal.

◆ shouldClusterStores()

bool llvm::GCNSubtarget::shouldClusterStores ( ) const
inline

Definition at line 934 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX11.

◆ supportsBPermute()

bool llvm::GCNSubtarget::supportsBPermute ( ) const
inline

Definition at line 994 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

◆ supportsGetDoorbellID()

bool llvm::GCNSubtarget::supportsGetDoorbellID ( ) const
inline

Definition at line 233 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ supportsMinMaxDenormModes()

bool llvm::GCNSubtarget::supportsMinMaxDenormModes ( ) const
inline

Definition at line 282 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX9.

◆ supportsWave32()

bool llvm::GCNSubtarget::supportsWave32 ( ) const
inline

Definition at line 879 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::GFX10.

◆ supportsWave64()

bool llvm::GCNSubtarget::supportsWave64 ( ) const
inline

Definition at line 881 of file GCNSubtarget.h.

◆ supportsWaveWideBPermute()

bool llvm::GCNSubtarget::supportsWaveWideBPermute ( ) const
inline

◆ supportsWGP()

bool llvm::GCNSubtarget::supportsWGP ( ) const
inline

◆ unsafeDSOffsetFoldingEnabled()

bool llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled ( ) const
inline

Definition at line 242 of file GCNSubtarget.h.

◆ useAA()

bool GCNSubtarget::useAA ( ) const
override

Definition at line 407 of file GCNSubtarget.cpp.

References UseAA.

◆ useDS128()

bool llvm::GCNSubtarget::useDS128 ( ) const
inline
Returns
If target supports ds_read/write_b128 and user enables generation of ds_read/write_b128.

Definition at line 293 of file GCNSubtarget.h.

◆ usePRTStrictNull()

bool llvm::GCNSubtarget::usePRTStrictNull ( ) const
inline
Returns
If target requires PRT Struct NULL support (zero result registers for sparse texture support).

Definition at line 309 of file GCNSubtarget.h.

◆ useRealTrue16Insts()

bool llvm::GCNSubtarget::useRealTrue16Insts ( ) const
inline

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.

Fake True16 instructions are identical to non-fake ones except that they take 32-bit registers as operands and always use their low halves.

Definition at line 1010 of file GCNSubtarget.h.

◆ useVGPRBlockOpsForCSR()

bool llvm::GCNSubtarget::useVGPRBlockOpsForCSR ( ) const
inline

Definition at line 585 of file GCNSubtarget.h.

◆ useVGPRIndexMode()

bool GCNSubtarget::useVGPRIndexMode ( ) const

Definition at line 403 of file GCNSubtarget.cpp.

References EnableVGPRIndexMode.

◆ vmemWriteNeedsExpWaitcnt()

bool llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt ( ) const
inline

Definition at line 411 of file GCNSubtarget.h.

References getGeneration(), and llvm::AMDGPUSubtarget::SEA_ISLANDS.

◆ zeroesHigh16BitsOfDest()

bool GCNSubtarget::zeroesHigh16BitsOfDest ( unsigned Opcode) const

Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicitly zeroes the high 16-bits, rather than preserve the original value.

This list was mostly derived from experimentation.

Definition at line 236 of file GCNSubtarget.cpp.

References getGeneration(), llvm::AMDGPUSubtarget::GFX9, and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.

Member Data Documentation

◆ DynamicVGPR

bool llvm::GCNSubtarget::DynamicVGPR = false
protected

Definition at line 73 of file GCNSubtarget.h.

Referenced by isDynamicVGPREnabled().

◆ DynamicVGPRBlockSize32

bool llvm::GCNSubtarget::DynamicVGPRBlockSize32 = false
protected

Definition at line 74 of file GCNSubtarget.h.

Referenced by getDynamicVGPRBlockSize().

◆ Gen

unsigned llvm::GCNSubtarget::Gen = INVALID
protected

Definition at line 64 of file GCNSubtarget.h.

Referenced by getGeneration(), and initializeSubtargetDependencies().

◆ InstCacheLineSize

unsigned llvm::GCNSubtarget::InstCacheLineSize = 0
protected

Definition at line 70 of file GCNSubtarget.h.

Referenced by getInstCacheLineSize(), and initializeSubtargetDependencies().

◆ InstrItins

InstrItineraryData llvm::GCNSubtarget::InstrItins
protected

Definition at line 65 of file GCNSubtarget.h.

Referenced by GCNSubtarget(), and getInstrItineraryData().

◆ LDSBankCount

int llvm::GCNSubtarget::LDSBankCount = 0
protected

Definition at line 66 of file GCNSubtarget.h.

Referenced by getLDSBankCount(), and initializeSubtargetDependencies().

◆ MaxHardClauseLength

unsigned llvm::GCNSubtarget::MaxHardClauseLength = 0
protected

The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than the maximum argument to S_CLAUSE.

A value of 0 indicates a lack of S_CLAUSE support.

Definition at line 80 of file GCNSubtarget.h.

Referenced by hasHardClauses(), and maxHardClauseLength().

◆ MaxPrivateElementSize

unsigned llvm::GCNSubtarget::MaxPrivateElementSize = 0
protected

Definition at line 67 of file GCNSubtarget.h.

Referenced by getMaxPrivateElementSize(), and initializeSubtargetDependencies().

◆ ScalarizeGlobal

bool llvm::GCNSubtarget::ScalarizeGlobal = false
protected

Definition at line 75 of file GCNSubtarget.h.

Referenced by getScalarizeGlobalBehavior(), and setScalarizeGlobalBehavior().

◆ TargetID

AMDGPU::IsaInfo::AMDGPUTargetID llvm::GCNSubtarget::TargetID
protected

The documentation for this class was generated from the following files: