doxygen/GCNSubtarget_8h_source.html

//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//==-----------------------------------------------------------------------===//

//

/// \file

/// AMD GCN specific subclass of TargetSubtarget.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H

#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H


#include "AMDGPUCallLowering.h"

#include "AMDGPURegisterBankInfo.h"

#include "AMDGPUSubtarget.h"

#include "SIFrameLowering.h"

#include "SIISelLowering.h"

#include "SIInstrInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/CodeGen/SelectionDAGTargetInfo.h"

#include "llvm/Support/ErrorHandling.h"


#define GET_SUBTARGETINFO_HEADER

#include "AMDGPUGenSubtargetInfo.inc"


namespace llvm {


class GCNTargetMachine;


class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

                           public AMDGPUSubtarget {

public:

  using AMDGPUSubtarget::getMaxWavesPerEU;


  // Following 2 enums are documented at:

  //   - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi

  enum class TrapHandlerAbi {

    NONE   = 0x00,

    AMDHSA = 0x01,

  };


  enum class TrapID {

    LLVMAMDHSATrap      = 0x02,

    LLVMAMDHSADebugTrap = 0x03,

  };


private:

  /// GlobalISel related APIs.

  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;

  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;

  std::unique_ptr<InstructionSelector> InstSelector;

  std::unique_ptr<LegalizerInfo> Legalizer;

  std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;


protected:

  // Basic subtarget description.

  Triple TargetTriple;

  AMDGPU::IsaInfo::AMDGPUTargetID TargetID;

  unsigned Gen = INVALID;

  InstrItineraryData InstrItins;

  int LDSBankCount = 0;

  unsigned MaxPrivateElementSize = 0;


  // Possibly statically set by tablegen, but may want to be overridden.

  bool FastDenormalF32 = false;

  bool HalfRate64Ops = false;

  bool FullRate64Ops = false;


  // Dynamically set bits that enable features.

  bool FlatForGlobal = false;

  bool AutoWaitcntBeforeBarrier = false;

  bool BackOffBarrier = false;

  bool UnalignedScratchAccess = false;

  bool UnalignedAccessMode = false;

  bool HasApertureRegs = false;

  bool SupportsXNACK = false;

  bool KernargPreload = false;


  // This should not be used directly. 'TargetID' tracks the dynamic settings

  // for XNACK.

  bool EnableXNACK = false;


  bool EnableTgSplit = false;

  bool EnableCuMode = false;

  bool TrapHandler = false;

  bool EnablePreciseMemory = false;


  // Used as options.

  bool EnableLoadStoreOpt = false;

  bool EnableUnsafeDSOffsetFolding = false;

  bool EnableSIScheduler = false;

  bool EnableDS128 = false;

  bool EnablePRTStrictNull = false;

  bool DumpCode = false;


  // Subtarget statically properties set by tablegen

  bool FP64 = false;

  bool FMA = false;

  bool MIMG_R128 = false;

  bool CIInsts = false;

  bool GFX8Insts = false;

  bool GFX9Insts = false;

  bool GFX90AInsts = false;

  bool GFX940Insts = false;

  bool GFX10Insts = false;

  bool GFX11Insts = false;

  bool GFX12Insts = false;

  bool GFX10_3Insts = false;

  bool GFX7GFX8GFX9Insts = false;

  bool SGPRInitBug = false;

  bool UserSGPRInit16Bug = false;

  bool NegativeScratchOffsetBug = false;

  bool NegativeUnalignedScratchOffsetBug = false;

  bool HasSMemRealTime = false;

  bool HasIntClamp = false;

  bool HasFmaMixInsts = false;

  bool HasMovrel = false;

  bool HasVGPRIndexMode = false;

  bool HasScalarDwordx3Loads = false;

  bool HasScalarStores = false;

  bool HasScalarAtomics = false;

  bool HasSDWAOmod = false;

  bool HasSDWAScalar = false;

  bool HasSDWASdst = false;

  bool HasSDWAMac = false;

  bool HasSDWAOutModsVOPC = false;

  bool HasDPP = false;

  bool HasDPP8 = false;

  bool HasDPALU_DPP = false;

  bool HasDPPSrc1SGPR = false;

  bool HasPackedFP32Ops = false;

  bool HasImageInsts = false;

  bool HasExtendedImageInsts = false;

  bool HasR128A16 = false;

  bool HasA16 = false;

  bool HasG16 = false;

  bool HasNSAEncoding = false;

  bool HasPartialNSAEncoding = false;

  bool GFX10_AEncoding = false;

  bool GFX10_BEncoding = false;

  bool HasDLInsts = false;

  bool HasFmacF64Inst = false;

  bool HasDot1Insts = false;

  bool HasDot2Insts = false;

  bool HasDot3Insts = false;

  bool HasDot4Insts = false;

  bool HasDot5Insts = false;

  bool HasDot6Insts = false;

  bool HasDot7Insts = false;

  bool HasDot8Insts = false;

  bool HasDot9Insts = false;

  bool HasDot10Insts = false;

  bool HasDot11Insts = false;

  bool HasMAIInsts = false;

  bool HasFP8Insts = false;

  bool HasFP8ConversionInsts = false;

  bool HasPkFmacF16Inst = false;

  bool HasAtomicFMinFMaxF32GlobalInsts = false;

  bool HasAtomicFMinFMaxF64GlobalInsts = false;

  bool HasAtomicFMinFMaxF32FlatInsts = false;

  bool HasAtomicFMinFMaxF64FlatInsts = false;

  bool HasAtomicDsPkAdd16Insts = false;

  bool HasAtomicFlatPkAdd16Insts = false;

  bool HasAtomicFaddRtnInsts = false;

  bool HasAtomicFaddNoRtnInsts = false;

  bool HasMemoryAtomicFaddF32DenormalSupport = false;

  bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;

  bool HasAtomicBufferGlobalPkAddF16Insts = false;

  bool HasAtomicCSubNoRtnInsts = false;

  bool HasAtomicGlobalPkAddBF16Inst = false;

  bool HasAtomicBufferPkAddBF16Inst = false;

  bool HasFlatAtomicFaddF32Inst = false;

  bool HasFlatBufferGlobalAtomicFaddF64Inst = false;

  bool HasDefaultComponentZero = false;

  bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;

  bool HasDefaultComponentBroadcast = false;

  /// The maximum number of instructions that may be placed within an S_CLAUSE,

  /// which is one greater than the maximum argument to S_CLAUSE. A value of 0

  /// indicates a lack of S_CLAUSE support.

  unsigned MaxHardClauseLength = 0;

  bool SupportsSRAMECC = false;


  // This should not be used directly. 'TargetID' tracks the dynamic settings

  // for SRAMECC.

  bool EnableSRAMECC = false;


  bool HasNoSdstCMPX = false;

  bool HasVscnt = false;

  bool HasGetWaveIdInst = false;

  bool HasSMemTimeInst = false;

  bool HasShaderCyclesRegister = false;

  bool HasShaderCyclesHiLoRegisters = false;

  bool HasVOP3Literal = false;

  bool HasNoDataDepHazard = false;

  bool FlatAddressSpace = false;

  bool FlatInstOffsets = false;

  bool FlatGlobalInsts = false;

  bool FlatScratchInsts = false;

  bool ScalarFlatScratchInsts = false;

  bool HasArchitectedFlatScratch = false;

  bool EnableFlatScratch = false;

  bool HasArchitectedSGPRs = false;

  bool HasGDS = false;

  bool HasGWS = false;

  bool AddNoCarryInsts = false;

  bool HasUnpackedD16VMem = false;

  bool LDSMisalignedBug = false;

  bool HasMFMAInlineLiteralBug = false;

  bool UnalignedBufferAccess = false;

  bool UnalignedDSAccess = false;

  bool HasPackedTID = false;

  bool ScalarizeGlobal = false;

  bool HasSALUFloatInsts = false;

  bool HasVGPRSingleUseHintInsts = false;

  bool HasPseudoScalarTrans = false;

  bool HasRestrictedSOffset = false;


  bool HasVcmpxPermlaneHazard = false;

  bool HasVMEMtoScalarWriteHazard = false;

  bool HasSMEMtoVectorWriteHazard = false;

  bool HasInstFwdPrefetchBug = false;

  bool HasVcmpxExecWARHazard = false;

  bool HasLdsBranchVmemWARHazard = false;

  bool HasNSAtoVMEMBug = false;

  bool HasNSAClauseBug = false;

  bool HasOffset3fBug = false;

  bool HasFlatSegmentOffsetBug = false;

  bool HasImageStoreD16Bug = false;

  bool HasImageGather4D16Bug = false;

  bool HasMSAALoadDstSelBug = false;

  bool HasPrivEnabledTrap2NopBug = false;

  bool Has1_5xVGPRs = false;

  bool HasMADIntraFwdBug = false;

  bool HasVOPDInsts = false;

  bool HasVALUTransUseHazard = false;

  bool HasForceStoreSC0SC1 = false;

  bool HasRequiredExportPriority = false;


  bool RequiresCOV6 = false;


  // Dummy feature to use for assembler in tablegen.

  bool FeatureDisable = false;


  SelectionDAGTargetInfo TSInfo;

private:

  SIInstrInfo InstrInfo;

  SITargetLowering TLInfo;

  SIFrameLowering FrameLowering;


public:

  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,

               const GCNTargetMachine &TM);

  ~GCNSubtarget() override;


  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,

                                                   StringRef GPU, StringRef FS);


  /// Diagnose inconsistent subtarget features before attempting to codegen

  /// function \p F.

  void checkSubtargetFeatures(const Function &F) const;


  const SIInstrInfo *getInstrInfo() const override {

    return &InstrInfo;

  }


  const SIFrameLowering *getFrameLowering() const override {

    return &FrameLowering;

  }


  const SITargetLowering *getTargetLowering() const override {

    return &TLInfo;

  }


  const SIRegisterInfo *getRegisterInfo() const override {

    return &InstrInfo.getRegisterInfo();

  }


  const CallLowering *getCallLowering() const override {

    return CallLoweringInfo.get();

  }


  const InlineAsmLowering *getInlineAsmLowering() const override {

    return InlineAsmLoweringInfo.get();

  }


  InstructionSelector *getInstructionSelector() const override {

    return InstSelector.get();

  }


  const LegalizerInfo *getLegalizerInfo() const override {

    return Legalizer.get();

  }


  const AMDGPURegisterBankInfo *getRegBankInfo() const override {

    return RegBankInfo.get();

  }


  const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {

    return TargetID;

  }


  // Nothing implemented, just prevent crashes on use.

  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {

    return &TSInfo;

  }


  const InstrItineraryData *getInstrItineraryData() const override {

    return &InstrItins;

  }


  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);


  Generation getGeneration() const {

    return (Generation)Gen;

  }


  unsigned getMaxWaveScratchSize() const {

    // See COMPUTE_TMPRING_SIZE.WAVESIZE.

    if (getGeneration() >= GFX12) {

      // 18-bit field in units of 64-dword.

      return (64 * 4) * ((1 << 18) - 1);

    }

    if (getGeneration() == GFX11) {

      // 15-bit field in units of 64-dword.

      return (64 * 4) * ((1 << 15) - 1);

    }

    // 13-bit field in units of 256-dword.

    return (256 * 4) * ((1 << 13) - 1);

  }


  /// Return the number of high bits known to be zero for a frame index.

  unsigned getKnownHighZeroBitsForFrameIndex() const {

    return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2();

  }


  int getLDSBankCount() const {

    return LDSBankCount;

  }


  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {

    return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;

  }


  unsigned getConstantBusLimit(unsigned Opcode) const;


  /// Returns if the result of this instruction with a 16-bit result returned in

  /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve

  /// the original value.

  bool zeroesHigh16BitsOfDest(unsigned Opcode) const;


  bool supportsWGP() const { return getGeneration() >= GFX10; }


  bool hasIntClamp() const {

    return HasIntClamp;

  }


  bool hasFP64() const {

    return FP64;

  }


  bool hasMIMG_R128() const {

    return MIMG_R128;

  }


  bool hasHWFP64() const {

    return FP64;

  }


  bool hasHalfRate64Ops() const {

    return HalfRate64Ops;

  }


  bool hasFullRate64Ops() const {

    return FullRate64Ops;

  }


  bool hasAddr64() const {

    return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);

  }


  bool hasFlat() const {

    return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);

  }


  // Return true if the target only has the reverse operand versions of VALU

  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).

  bool hasOnlyRevVALUShifts() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  bool hasFractBug() const {

    return getGeneration() == SOUTHERN_ISLANDS;

  }


  bool hasBFE() const {

    return true;

  }


  bool hasBFI() const {

    return true;

  }


  bool hasBFM() const {

    return hasBFE();

  }


  bool hasBCNT(unsigned Size) const {

    return true;

  }


  bool hasFFBL() const {

    return true;

  }


  bool hasFFBH() const {

    return true;

  }


  bool hasMed3_16() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  bool hasMin3Max3_16() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  bool hasFmaMixInsts() const {

    return HasFmaMixInsts;

  }


  bool hasCARRY() const {

    return true;

  }


  bool hasFMA() const {

    return FMA;

  }


  bool hasSwap() const {

    return GFX9Insts;

  }


  bool hasScalarPackInsts() const {

    return GFX9Insts;

  }


  bool hasScalarMulHiInsts() const {

    return GFX9Insts;

  }


  bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }


  TrapHandlerAbi getTrapHandlerAbi() const {

    return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;

  }


  bool supportsGetDoorbellID() const {

    // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.

    return getGeneration() >= GFX9;

  }


  /// True if the offset field of DS instructions works as expected. On SI, the

  /// offset uses a 16-bit adder and does not always wrap properly.

  bool hasUsableDSOffset() const {

    return getGeneration() >= SEA_ISLANDS;

  }


  bool unsafeDSOffsetFoldingEnabled() const {

    return EnableUnsafeDSOffsetFolding;

  }


  /// Condition output from div_scale is usable.

  bool hasUsableDivScaleConditionOutput() const {

    return getGeneration() != SOUTHERN_ISLANDS;

  }


  /// Extra wait hazard is needed in some cases before

  /// s_cbranch_vccnz/s_cbranch_vccz.

  bool hasReadVCCZBug() const {

    return getGeneration() <= SEA_ISLANDS;

  }


  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.

  bool partialVCCWritesUpdateVCCZ() const {

    return getGeneration() >= GFX10;

  }


  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR

  /// was written by a VALU instruction.

  bool hasSMRDReadVALUDefHazard() const {

    return getGeneration() == SOUTHERN_ISLANDS;

  }


  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the

  /// SGPR was written by a VALU Instruction.

  bool hasVMEMReadSGPRVALUDefHazard() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  bool hasRFEHazards() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.

  unsigned getSetRegWaitStates() const {

    return getGeneration() <= SEA_ISLANDS ? 1 : 2;

  }


  bool dumpCode() const {

    return DumpCode;

  }


  /// Return the amount of LDS that can be used that will not restrict the

  /// occupancy lower than WaveCount.

  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,

                                           const Function &) const;


  bool supportsMinMaxDenormModes() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  /// \returns If target supports S_DENORM_MODE.

  bool hasDenormModeInst() const {

    return getGeneration() >= AMDGPUSubtarget::GFX10;

  }


  bool useFlatForGlobal() const {

    return FlatForGlobal;

  }


  /// \returns If target supports ds_read/write_b128 and user enables generation

  /// of ds_read/write_b128.

  bool useDS128() const {

    return CIInsts && EnableDS128;

  }


  /// \return If target supports ds_read/write_b96/128.

  bool hasDS96AndDS128() const {

    return CIInsts;

  }


  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64

  bool haveRoundOpsF64() const {

    return CIInsts;

  }


  /// \returns If MUBUF instructions always perform range checking, even for

  /// buffer resources used for private memory access.

  bool privateMemoryResourceIsRangeChecked() const {

    return getGeneration() < AMDGPUSubtarget::GFX9;

  }


  /// \returns If target requires PRT Struct NULL support (zero result registers

  /// for sparse texture support).

  bool usePRTStrictNull() const {

    return EnablePRTStrictNull;

  }


  bool hasAutoWaitcntBeforeBarrier() const {

    return AutoWaitcntBeforeBarrier;

  }


  /// \returns true if the target supports backing off of s_barrier instructions

  /// when an exception is raised.

  bool supportsBackOffBarrier() const {

    return BackOffBarrier;

  }


  bool hasUnalignedBufferAccess() const {

    return UnalignedBufferAccess;

  }


  bool hasUnalignedBufferAccessEnabled() const {

    return UnalignedBufferAccess && UnalignedAccessMode;

  }


  bool hasUnalignedDSAccess() const {

    return UnalignedDSAccess;

  }


  bool hasUnalignedDSAccessEnabled() const {

    return UnalignedDSAccess && UnalignedAccessMode;

  }


  bool hasUnalignedScratchAccess() const {

    return UnalignedScratchAccess;

  }


  bool hasUnalignedAccessMode() const {

    return UnalignedAccessMode;

  }


  bool hasApertureRegs() const {

    return HasApertureRegs;

  }


  bool isTrapHandlerEnabled() const {

    return TrapHandler;

  }


  bool isXNACKEnabled() const {

    return TargetID.isXnackOnOrAny();

  }


  bool isTgSplitEnabled() const {

    return EnableTgSplit;

  }


  bool isCuModeEnabled() const {

    return EnableCuMode;

  }


  bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }


  bool hasFlatAddressSpace() const {

    return FlatAddressSpace;

  }


  bool hasFlatScrRegister() const {

    return hasFlatAddressSpace();

  }


  bool hasFlatInstOffsets() const {

    return FlatInstOffsets;

  }


  bool hasFlatGlobalInsts() const {

    return FlatGlobalInsts;

  }


  bool hasFlatScratchInsts() const {

    return FlatScratchInsts;

  }


  // Check if target supports ST addressing mode with FLAT scratch instructions.

  // The ST addressing mode means no registers are used, either VGPR or SGPR,

  // but only immediate offset is swizzled and added to the FLAT scratch base.

  bool hasFlatScratchSTMode() const {

    return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());

  }


  bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }


  bool hasScalarFlatScratchInsts() const {

    return ScalarFlatScratchInsts;

  }


  bool enableFlatScratch() const {

    return flatScratchIsArchitected() ||

           (EnableFlatScratch && hasFlatScratchInsts());

  }


  bool hasGlobalAddTidInsts() const {

    return GFX10_BEncoding;

  }


  bool hasAtomicCSub() const {

    return GFX10_BEncoding;

  }


  bool hasExportInsts() const {

    return !hasGFX940Insts();

  }


  bool hasVINTERPEncoding() const {

    return GFX11Insts;

  }


  // DS_ADD_F64/DS_ADD_RTN_F64

  bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }


  bool hasMultiDwordFlatScratchAddressing() const {

    return getGeneration() >= GFX9;

  }


  bool hasFlatSegmentOffsetBug() const {

    return HasFlatSegmentOffsetBug;

  }


  bool hasFlatLgkmVMemCountInOrder() const {

    return getGeneration() > GFX9;

  }


  bool hasD16LoadStore() const {

    return getGeneration() >= GFX9;

  }


  bool d16PreservesUnusedBits() const {

    return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();

  }


  bool hasD16Images() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  /// Return if most LDS instructions have an m0 use that require m0 to be

  /// initialized.

  bool ldsRequiresM0Init() const {

    return getGeneration() < GFX9;

  }


  // True if the hardware rewinds and replays GWS operations if a wave is

  // preempted.

  //

  // If this is false, a GWS operation requires testing if a nack set the

  // MEM_VIOL bit, and repeating if so.

  bool hasGWSAutoReplay() const {

    return getGeneration() >= GFX9;

  }


  /// \returns if target has ds_gws_sema_release_all instruction.

  bool hasGWSSemaReleaseAll() const {

    return CIInsts;

  }


  /// \returns true if the target has integer add/sub instructions that do not

  /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,

  /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier

  /// for saturation.

  bool hasAddNoCarry() const {

    return AddNoCarryInsts;

  }


  bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }


  bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }


  bool hasUnpackedD16VMem() const {

    return HasUnpackedD16VMem;

  }


  // Covers VS/PS/CS graphics shaders

  bool isMesaGfxShader(const Function &F) const {

    return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());

  }


  bool hasMad64_32() const {

    return getGeneration() >= SEA_ISLANDS;

  }


  bool hasSDWAOmod() const {

    return HasSDWAOmod;

  }


  bool hasSDWAScalar() const {

    return HasSDWAScalar;

  }


  bool hasSDWASdst() const {

    return HasSDWASdst;

  }


  bool hasSDWAMac() const {

    return HasSDWAMac;

  }


  bool hasSDWAOutModsVOPC() const {

    return HasSDWAOutModsVOPC;

  }


  bool hasDLInsts() const {

    return HasDLInsts;

  }


  bool hasFmacF64Inst() const { return HasFmacF64Inst; }


  bool hasDot1Insts() const {

    return HasDot1Insts;

  }


  bool hasDot2Insts() const {

    return HasDot2Insts;

  }


  bool hasDot3Insts() const {

    return HasDot3Insts;

  }


  bool hasDot4Insts() const {

    return HasDot4Insts;

  }


  bool hasDot5Insts() const {

    return HasDot5Insts;

  }


  bool hasDot6Insts() const {

    return HasDot6Insts;

  }


  bool hasDot7Insts() const {

    return HasDot7Insts;

  }


  bool hasDot8Insts() const {

    return HasDot8Insts;

  }


  bool hasDot9Insts() const {

    return HasDot9Insts;

  }


  bool hasDot10Insts() const {

    return HasDot10Insts;

  }


  bool hasDot11Insts() const {

    return HasDot11Insts;

  }


  bool hasMAIInsts() const {

    return HasMAIInsts;

  }


  bool hasFP8Insts() const {

    return HasFP8Insts;

  }


  bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }


  bool hasPkFmacF16Inst() const {

    return HasPkFmacF16Inst;

  }


  bool hasAtomicFMinFMaxF32GlobalInsts() const {

    return HasAtomicFMinFMaxF32GlobalInsts;

  }


  bool hasAtomicFMinFMaxF64GlobalInsts() const {

    return HasAtomicFMinFMaxF64GlobalInsts;

  }


  bool hasAtomicFMinFMaxF32FlatInsts() const {

    return HasAtomicFMinFMaxF32FlatInsts;

  }


  bool hasAtomicFMinFMaxF64FlatInsts() const {

    return HasAtomicFMinFMaxF64FlatInsts;

  }


  bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }


  bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }


  bool hasAtomicFaddInsts() const {

    return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;

  }


  bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; }


  bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }


  bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const {

    return HasAtomicBufferGlobalPkAddF16NoRtnInsts;

  }


  bool hasAtomicBufferGlobalPkAddF16Insts() const {

    return HasAtomicBufferGlobalPkAddF16Insts;

  }


  bool hasAtomicGlobalPkAddBF16Inst() const {

    return HasAtomicGlobalPkAddBF16Inst;

  }


  bool hasAtomicBufferPkAddBF16Inst() const {

    return HasAtomicBufferPkAddBF16Inst;

  }


  bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }


  /// \return true if the target has flat, global, and buffer atomic fadd for

  /// double.

  bool hasFlatBufferGlobalAtomicFaddF64Inst() const {

    return HasFlatBufferGlobalAtomicFaddF64Inst;

  }


  /// \return true if the target's flat, global, and buffer atomic fadd for

  /// float supports denormal handling.

  bool hasMemoryAtomicFaddF32DenormalSupport() const {

    return HasMemoryAtomicFaddF32DenormalSupport;

  }


  /// \return true if atomic operations targeting fine-grained memory work

  /// correctly at device scope, in allocations in host or peer PCIe device

  /// memory.

  bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const {

    return HasAgentScopeFineGrainedRemoteMemoryAtomics;

  }


  bool hasDefaultComponentZero() const { return HasDefaultComponentZero; }


  bool hasDefaultComponentBroadcast() const {

    return HasDefaultComponentBroadcast;

  }


  bool hasNoSdstCMPX() const {

    return HasNoSdstCMPX;

  }


  bool hasVscnt() const {

    return HasVscnt;

  }


  bool hasGetWaveIdInst() const {

    return HasGetWaveIdInst;

  }


  bool hasSMemTimeInst() const {

    return HasSMemTimeInst;

  }


  bool hasShaderCyclesRegister() const {

    return HasShaderCyclesRegister;

  }


  bool hasShaderCyclesHiLoRegisters() const {

    return HasShaderCyclesHiLoRegisters;

  }


  bool hasVOP3Literal() const {

    return HasVOP3Literal;

  }


  bool hasNoDataDepHazard() const {

    return HasNoDataDepHazard;

  }


  bool vmemWriteNeedsExpWaitcnt() const {

    return getGeneration() < SEA_ISLANDS;

  }


  bool hasInstPrefetch() const {

    return getGeneration() == GFX10 || getGeneration() == GFX11;

  }


  bool hasPrefetch() const { return GFX12Insts; }


  // Has s_cmpk_* instructions.

  bool hasSCmpK() const { return getGeneration() < GFX12; }


  // Scratch is allocated in 256 dword per wave blocks for the entire

  // wavefront. When viewed from the perspective of an arbitrary workitem, this

  // is 4-byte aligned.

  //

  // Only 4-byte alignment is really needed to access anything. Transformations

  // on the pointer value itself may rely on the alignment / known low bits of

  // the pointer. Set this to something above the minimum to avoid needing

  // dynamic realignment in common cases.

  Align getStackAlignment() const { return Align(16); }


  bool enableMachineScheduler() const override {

    return true;

  }


  bool useAA() const override;


  bool enableSubRegLiveness() const override {

    return true;

  }


  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }

  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }


  // static wrappers

  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);


  // XXX - Why is this here if it isn't in the default pass set?

  bool enableEarlyIfConversion() const override {

    return true;

  }


  void overrideSchedPolicy(MachineSchedPolicy &Policy,

                           unsigned NumRegionInstrs) const override;


  void mirFileLoaded(MachineFunction &MF) const override;


  unsigned getMaxNumUserSGPRs() const {

    return AMDGPU::getMaxNumUserSGPRs(*this);

  }


  bool hasSMemRealTime() const {

    return HasSMemRealTime;

  }


  bool hasMovrel() const {

    return HasMovrel;

  }


  bool hasVGPRIndexMode() const {

    return HasVGPRIndexMode;

  }


  bool useVGPRIndexMode() const;


  bool hasScalarCompareEq64() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; }


  bool hasScalarStores() const {

    return HasScalarStores;

  }


  bool hasScalarAtomics() const {

    return HasScalarAtomics;

  }


  bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }

  bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; }


  /// \returns true if the subtarget has the v_permlanex16_b32 instruction.

  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }


  /// \returns true if the subtarget has the v_permlane64_b32 instruction.

  bool hasPermLane64() const { return getGeneration() >= GFX11; }


  bool hasDPP() const {

    return HasDPP;

  }


  bool hasDPPBroadcasts() const {

    return HasDPP && getGeneration() < GFX10;

  }


  bool hasDPPWavefrontShifts() const {

    return HasDPP && getGeneration() < GFX10;

  }


  bool hasDPP8() const {

    return HasDPP8;

  }


  bool hasDPALU_DPP() const {

    return HasDPALU_DPP;

  }


  bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }


  bool hasPackedFP32Ops() const {

    return HasPackedFP32Ops;

  }


  // Has V_PK_MOV_B32 opcode

  bool hasPkMovB32() const {

    return GFX90AInsts;

  }


  bool hasFmaakFmamkF32Insts() const {

    return getGeneration() >= GFX10 || hasGFX940Insts();

  }


  bool hasImageInsts() const {

    return HasImageInsts;

  }


  bool hasExtendedImageInsts() const {

    return HasExtendedImageInsts;

  }


  bool hasR128A16() const {

    return HasR128A16;

  }


  bool hasA16() const { return HasA16; }


  bool hasG16() const { return HasG16; }


  bool hasOffset3fBug() const {

    return HasOffset3fBug;

  }


  bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }


  bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }


  bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }


  bool hasMSAALoadDstSelBug() const { return HasMSAALoadDstSelBug; }


  bool hasPrivEnabledTrap2NopBug() const { return HasPrivEnabledTrap2NopBug; }


  bool hasNSAEncoding() const { return HasNSAEncoding; }


  bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }


  bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; }


  unsigned getNSAMaxSize(bool HasSampler = false) const {

    return AMDGPU::getNSAMaxSize(*this, HasSampler);

  }


  bool hasGFX10_AEncoding() const {

    return GFX10_AEncoding;

  }


  bool hasGFX10_BEncoding() const {

    return GFX10_BEncoding;

  }


  bool hasGFX10_3Insts() const {

    return GFX10_3Insts;

  }


  bool hasMadF16() const;


  bool hasMovB64() const { return GFX940Insts; }


  bool hasLshlAddB64() const { return GFX940Insts; }


  bool enableSIScheduler() const {

    return EnableSIScheduler;

  }


  bool loadStoreOptEnabled() const {

    return EnableLoadStoreOpt;

  }


  bool hasSGPRInitBug() const {

    return SGPRInitBug;

  }


  bool hasUserSGPRInit16Bug() const {

    return UserSGPRInit16Bug && isWave32();

  }


  bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }


  bool hasNegativeUnalignedScratchOffsetBug() const {

    return NegativeUnalignedScratchOffsetBug;

  }


  bool hasMFMAInlineLiteralBug() const {

    return HasMFMAInlineLiteralBug;

  }


  bool has12DWordStoreHazard() const {

    return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;

  }


  // \returns true if the subtarget supports DWORDX3 load/store instructions.

  bool hasDwordx3LoadStores() const {

    return CIInsts;

  }


  bool hasReadM0MovRelInterpHazard() const {

    return getGeneration() == AMDGPUSubtarget::GFX9;

  }


  bool hasReadM0SendMsgHazard() const {

    return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&

           getGeneration() <= AMDGPUSubtarget::GFX9;

  }


  bool hasReadM0LdsDmaHazard() const {

    return getGeneration() == AMDGPUSubtarget::GFX9;

  }


  bool hasReadM0LdsDirectHazard() const {

    return getGeneration() == AMDGPUSubtarget::GFX9;

  }


  bool hasVcmpxPermlaneHazard() const {

    return HasVcmpxPermlaneHazard;

  }


  bool hasVMEMtoScalarWriteHazard() const {

    return HasVMEMtoScalarWriteHazard;

  }


  bool hasSMEMtoVectorWriteHazard() const {

    return HasSMEMtoVectorWriteHazard;

  }


  bool hasLDSMisalignedBug() const {

    return LDSMisalignedBug && !EnableCuMode;

  }


  bool hasInstFwdPrefetchBug() const {

    return HasInstFwdPrefetchBug;

  }


  bool hasVcmpxExecWARHazard() const {

    return HasVcmpxExecWARHazard;

  }


  bool hasLdsBranchVmemWARHazard() const {

    return HasLdsBranchVmemWARHazard;

  }


  // Shift amount of a 64 bit shift cannot be a highest allocated register

  // if also at the end of the allocation block.

  bool hasShift64HighRegBug() const {

    return GFX90AInsts && !GFX940Insts;

  }


  // Has one cycle hazard on transcendental instruction feeding a

  // non transcendental VALU.

  bool hasTransForwardingHazard() const { return GFX940Insts; }


  // Has one cycle hazard on a VALU instruction partially writing dst with

  // a shift of result bits feeding another VALU instruction.

  bool hasDstSelForwardingHazard() const { return GFX940Insts; }


  // Cannot use op_sel with v_dot instructions.

  bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }


  // Does not have HW interlocs for VALU writing and then reading SGPRs.

  bool hasVDecCoExecHazard() const {

    return GFX940Insts;

  }


  bool hasNSAtoVMEMBug() const {

    return HasNSAtoVMEMBug;

  }


  bool hasNSAClauseBug() const { return HasNSAClauseBug; }


  bool hasHardClauses() const { return MaxHardClauseLength > 0; }


  bool hasGFX90AInsts() const { return GFX90AInsts; }


  bool hasFPAtomicToDenormModeHazard() const {

    return getGeneration() == GFX10;

  }


  bool hasVOP3DPP() const { return getGeneration() >= GFX11; }


  bool hasLdsDirect() const { return getGeneration() >= GFX11; }


  bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }


  bool hasVALUPartialForwardingHazard() const {

    return getGeneration() == GFX11;

  }


  bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }


  bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }


  bool requiresCodeObjectV6() const { return RequiresCOV6; }


  bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }


  /// Return if operations acting on VGPR tuples require even alignment.

  bool needsAlignedVGPRs() const { return GFX90AInsts; }


  /// Return true if the target has the S_PACK_HL_B32_B16 instruction.

  bool hasSPackHL() const { return GFX11Insts; }


  /// Return true if the target's EXP instruction has the COMPR flag, which

  /// affects the meaning of the EN (enable) bits.

  bool hasCompressedExport() const { return !GFX11Insts; }


  /// Return true if the target's EXP instruction supports the NULL export

  /// target.

  bool hasNullExportTarget() const { return !GFX11Insts; }


  bool has1_5xVGPRs() const { return Has1_5xVGPRs; }


  bool hasVOPDInsts() const { return HasVOPDInsts; }


  bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }


  /// Return true if the target has the S_DELAY_ALU instruction.

  bool hasDelayAlu() const { return GFX11Insts; }


  bool hasPackedTID() const { return HasPackedTID; }


  // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that

  // hasGFX90AInsts is also true.

  bool hasGFX940Insts() const { return GFX940Insts; }


  bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }


  bool hasVGPRSingleUseHintInsts() const { return HasVGPRSingleUseHintInsts; }


  bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; }


  bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }


  bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }


  /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt

  /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.

  bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }


  /// \returns The maximum number of instructions that can be enclosed in an

  /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that

  /// instruction.

  unsigned maxHardClauseLength() const { return MaxHardClauseLength; }


  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs

  /// SGPRs

  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;


  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs

  /// VGPRs

  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;


  /// Return occupancy for the given function. Used LDS and a number of

  /// registers if provided.

  /// Note, occupancy can be affected by the scratch allocation as well, but

  /// we do not have enough information to compute it.

  unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,

                            unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;


  /// \returns true if the flat_scratch register should be initialized with the

  /// pointer to the wave's scratch memory rather than a size and offset.

  bool flatScratchIsPointer() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  /// \returns true if the flat_scratch register is initialized by the HW.

  /// In this case it is readonly.

  bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }


  /// \returns true if the architected SGPRs are enabled.

  bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }


  /// \returns true if Global Data Share is supported.

  bool hasGDS() const { return HasGDS; }


  /// \returns true if Global Wave Sync is supported.

  bool hasGWS() const { return HasGWS; }


  /// \returns true if the machine has merged shaders in which s0-s7 are

  /// reserved by the hardware and user SGPRs start at s8

  bool hasMergedShaders() const {

    return getGeneration() >= GFX9;

  }


  // \returns true if the target supports the pre-NGG legacy geometry path.

  bool hasLegacyGeometry() const { return getGeneration() < GFX11; }


  // \returns true if preloading kernel arguments is supported.

  bool hasKernargPreload() const { return KernargPreload; }


  // \returns true if the target has split barriers feature

  bool hasSplitBarriers() const { return getGeneration() >= GFX12; }


  // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.

  bool hasCvtFP8VOP1Bug() const { return true; }


  // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a

  // no-return form.

  bool hasAtomicCSubNoRtnInsts() const { return HasAtomicCSubNoRtnInsts; }


  // \returns true if the target has DX10_CLAMP kernel descriptor mode bit

  bool hasDX10ClampMode() const { return getGeneration() < GFX12; }


  // \returns true if the target has IEEE kernel descriptor mode bit

  bool hasIEEEMode() const { return getGeneration() < GFX12; }


  // \returns true if the target has IEEE fminimum/fmaximum instructions

  bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }


  // \returns true if the target has IEEE fminimum3/fmaximum3 instructions

  bool hasIEEEMinMax3() const { return hasIEEEMinMax(); }


  // \returns true if the target has WG_RR_MODE kernel descriptor mode bit

  bool hasRrWGMode() const { return getGeneration() >= GFX12; }


  /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative

  /// values.

  bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }


  // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead

  // of sign-extending.

  bool hasGetPCZeroExtension() const { return GFX12Insts; }


  /// \returns SGPR allocation granularity supported by the subtarget.

  unsigned getSGPRAllocGranule() const {

    return AMDGPU::IsaInfo::getSGPRAllocGranule(this);

  }


  /// \returns SGPR encoding granularity supported by the subtarget.

  unsigned getSGPREncodingGranule() const {

    return AMDGPU::IsaInfo::getSGPREncodingGranule(this);

  }


  /// \returns Total number of SGPRs supported by the subtarget.

  unsigned getTotalNumSGPRs() const {

    return AMDGPU::IsaInfo::getTotalNumSGPRs(this);

  }


  /// \returns Addressable number of SGPRs supported by the subtarget.

  unsigned getAddressableNumSGPRs() const {

    return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);

  }


  /// \returns Minimum number of SGPRs that meets the given number of waves per

  /// execution unit requirement supported by the subtarget.

  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {

    return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);

  }


  /// \returns Maximum number of SGPRs that meets the given number of waves per

  /// execution unit requirement supported by the subtarget.

  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {

    return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);

  }


  /// \returns Reserved number of SGPRs. This is common

  /// utility function called by MachineFunction and

  /// Function variants of getReservedNumSGPRs.

  unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;

  /// \returns Reserved number of SGPRs for given machine function \p MF.

  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;


  /// \returns Reserved number of SGPRs for given function \p F.

  unsigned getReservedNumSGPRs(const Function &F) const;


  /// \returns max num SGPRs. This is the common utility

  /// function called by MachineFunction and Function

  /// variants of getMaxNumSGPRs.

  unsigned getBaseMaxNumSGPRs(const Function &F,

                              std::pair<unsigned, unsigned> WavesPerEU,

                              unsigned PreloadedSGPRs,

                              unsigned ReservedNumSGPRs) const;


  /// \returns Maximum number of SGPRs that meets number of waves per execution

  /// unit requirement for function \p MF, or number of SGPRs explicitly

  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;


  /// \returns Maximum number of SGPRs that meets number of waves per execution

  /// unit requirement for function \p F, or number of SGPRs explicitly

  /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumSGPRs(const Function &F) const;


  /// \returns VGPR allocation granularity supported by the subtarget.

  unsigned getVGPRAllocGranule() const {

    return AMDGPU::IsaInfo::getVGPRAllocGranule(this);

  }


  /// \returns VGPR encoding granularity supported by the subtarget.

  unsigned getVGPREncodingGranule() const {

    return AMDGPU::IsaInfo::getVGPREncodingGranule(this);

  }


  /// \returns Total number of VGPRs supported by the subtarget.

  unsigned getTotalNumVGPRs() const {

    return AMDGPU::IsaInfo::getTotalNumVGPRs(this);

  }


  /// \returns Addressable number of architectural VGPRs supported by the

  /// subtarget.

  unsigned getAddressableNumArchVGPRs() const {

    return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this);

  }


  /// \returns Addressable number of VGPRs supported by the subtarget.

  unsigned getAddressableNumVGPRs() const {

    return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);

  }


  /// \returns the minimum number of VGPRs that will prevent achieving more than

  /// the specified number of waves \p WavesPerEU.

  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {

    return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);

  }


  /// \returns the maximum number of VGPRs that can be used and still achieved

  /// at least the specified number of waves \p WavesPerEU.

  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {

    return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);

  }


  /// \returns max num VGPRs. This is the common utility function

  /// called by MachineFunction and Function variants of getMaxNumVGPRs.

  unsigned getBaseMaxNumVGPRs(const Function &F,

                              std::pair<unsigned, unsigned> WavesPerEU) const;

  /// \returns Maximum number of VGPRs that meets number of waves per execution

  /// unit requirement for function \p F, or number of VGPRs explicitly

  /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumVGPRs(const Function &F) const;


  unsigned getMaxNumAGPRs(const Function &F) const {

    return getMaxNumVGPRs(F);

  }


  /// \returns Maximum number of VGPRs that meets number of waves per execution

  /// unit requirement for function \p MF, or number of VGPRs explicitly

  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;


  void getPostRAMutations(

      std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)

      const override;


  std::unique_ptr<ScheduleDAGMutation>

  createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;


  bool isWave32() const {

    return getWavefrontSize() == 32;

  }


  bool isWave64() const {

    return getWavefrontSize() == 64;

  }


  const TargetRegisterClass *getBoolRC() const {

    return getRegisterInfo()->getBoolRC();

  }


  /// \returns Maximum number of work groups per compute unit supported by the

  /// subtarget and limited by given \p FlatWorkGroupSize.

  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {

    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);

  }


  /// \returns Minimum flat work group size supported by the subtarget.

  unsigned getMinFlatWorkGroupSize() const override {

    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);

  }


  /// \returns Maximum flat work group size supported by the subtarget.

  unsigned getMaxFlatWorkGroupSize() const override {

    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);

  }


  /// \returns Number of waves per execution unit required to support the given

  /// \p FlatWorkGroupSize.

  unsigned

  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {

    return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);

  }


  /// \returns Minimum number of waves per execution unit supported by the

  /// subtarget.

  unsigned getMinWavesPerEU() const override {

    return AMDGPU::IsaInfo::getMinWavesPerEU(this);

  }


  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,

                             SDep &Dep,

                             const TargetSchedModel *SchedModel) const override;


  // \returns true if it's beneficial on this subtarget for the scheduler to

  // cluster stores as well as loads.

  bool shouldClusterStores() const { return getGeneration() >= GFX11; }


  // \returns the number of address arguments from which to enable MIMG NSA

  // on supported architectures.

  unsigned getNSAThreshold(const MachineFunction &MF) const;


  // \returns true if the subtarget has a hazard requiring an "s_nop 0"

  // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".

  bool requiresNopBeforeDeallocVGPRs() const {

    // Currently all targets that support the dealloc VGPRs message also require

    // the nop.

    return true;

  }

};


class GCNUserSGPRUsageInfo {

public:

  bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }


  bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }


  bool hasDispatchPtr() const { return DispatchPtr; }


  bool hasQueuePtr() const { return QueuePtr; }


  bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }


  bool hasDispatchID() const { return DispatchID; }


  bool hasFlatScratchInit() const { return FlatScratchInit; }


  bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }


  unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }


  unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }


  unsigned getNumFreeUserSGPRs();


  void allocKernargPreloadSGPRs(unsigned NumSGPRs);


  enum UserSGPRID : unsigned {

    ImplicitBufferPtrID = 0,

    PrivateSegmentBufferID = 1,

    DispatchPtrID = 2,

    QueuePtrID = 3,

    KernargSegmentPtrID = 4,

    DispatchIdID = 5,

    FlatScratchInitID = 6,

    PrivateSegmentSizeID = 7

  };


  // Returns the size in number of SGPRs for preload user SGPR field.

  static unsigned getNumUserSGPRForField(UserSGPRID ID) {

    switch (ID) {

    case ImplicitBufferPtrID:

      return 2;

    case PrivateSegmentBufferID:

      return 4;

    case DispatchPtrID:

      return 2;

    case QueuePtrID:

      return 2;

    case KernargSegmentPtrID:

      return 2;

    case DispatchIdID:

      return 2;

    case FlatScratchInitID:

      return 2;

    case PrivateSegmentSizeID:

      return 1;

    }

    llvm_unreachable("Unknown UserSGPRID.");

  }


  GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);


private:

  const GCNSubtarget &ST;


  // Private memory buffer

  // Compute directly in sgpr[0:1]

  // Other shaders indirect 64-bits at sgpr[0:1]

  bool ImplicitBufferPtr = false;


  bool PrivateSegmentBuffer = false;


  bool DispatchPtr = false;


  bool QueuePtr = false;


  bool KernargSegmentPtr = false;


  bool DispatchID = false;


  bool FlatScratchInit = false;


  bool PrivateSegmentSize = false;


  unsigned NumKernargPreloadSGPRs = 0;


  unsigned NumUsedUserSGPRs = 0;

};


} // end namespace llvm


#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H

AMDGPUBaseInfo.h

AMDGPUCallLowering.h
This file describes how to lower LLVM calls to machine code calls.

AMDGPURegisterBankInfo.h
This file declares the targeting of the RegisterBankInfo class for AMDGPU.

AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125

F
#define F(x, y, z)
Definition: MD5.cpp:55

TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:48

SIFrameLowering.h

SIISelLowering.h
SI DAG Lowering interface definition.

SIInstrInfo.h
Interface definition for SIInstrInfo.

SelectionDAGTargetInfo.h

AMDGPUGenSubtargetInfo

llvm::AMDGPURegisterBankInfo
Definition: AMDGPURegisterBankInfo.h:42

llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29

llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:136

llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31

llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41

llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40

llvm::AMDGPUSubtarget::GFX12
@ GFX12
Definition: AMDGPUSubtarget.h:43

llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition: AMDGPUSubtarget.h:32

llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38

llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37

llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39

llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition: AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:225

llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:285

llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:128

llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:221

llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:123

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccOnOrAny
bool isSramEccOnOrAny() const
Definition: AMDGPUBaseInfo.h:168

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackOnOrAny
bool isXnackOnOrAny() const
Definition: AMDGPUBaseInfo.h:139

llvm::CallLowering
Definition: CallLowering.h:44

llvm::Function
Definition: Function.h:64

llvm::GCNSubtarget
Definition: GCNSubtarget.h:35

llvm::GCNSubtarget::hasPrefetch
bool hasPrefetch() const
Definition: GCNSubtarget.h:940

llvm::GCNSubtarget::hasMemoryAtomicFaddF32DenormalSupport
bool hasMemoryAtomicFaddF32DenormalSupport() const
Definition: GCNSubtarget.h:883

llvm::GCNSubtarget::GFX10Insts
bool GFX10Insts
Definition: GCNSubtarget.h:109

llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:385

llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition: GCNSubtarget.h:696

llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:64

llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:685

llvm::GCNSubtarget::HasDot6Insts
bool HasDot6Insts
Definition: GCNSubtarget.h:152

llvm::GCNSubtarget::hasAtomicDsPkAdd16Insts
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:845

llvm::GCNSubtarget::hasSDWAOmod
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:745

llvm::GCNSubtarget::HasLdsBranchVmemWARHazard
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:227

llvm::GCNSubtarget::HasDot11Insts
bool HasDot11Insts
Definition: GCNSubtarget.h:157

llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:488

llvm::GCNSubtarget::Has1_5xVGPRs
bool Has1_5xVGPRs
Definition: GCNSubtarget.h:236

llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition: GCNSubtarget.h:443

llvm::GCNSubtarget::hasPkFmacF16Inst
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:825

llvm::GCNSubtarget::HasAtomicFMinFMaxF64FlatInsts
bool HasAtomicFMinFMaxF64FlatInsts
Definition: GCNSubtarget.h:165

llvm::GCNSubtarget::hasDot2Insts
bool hasDot2Insts() const
Definition: GCNSubtarget.h:775

llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:688

llvm::GCNSubtarget::HasExtendedImageInsts
bool HasExtendedImageInsts
Definition: GCNSubtarget.h:137

llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition: GCNSubtarget.h:1333

llvm::GCNSubtarget::hasA16
bool hasA16() const
Definition: GCNSubtarget.h:1069

llvm::GCNSubtarget::hasSDWAScalar
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:749

llvm::GCNSubtarget::hasRrWGMode
bool hasRrWGMode() const
Definition: GCNSubtarget.h:1366

llvm::GCNSubtarget::supportsBackOffBarrier
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:569

llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:999

llvm::GCNSubtarget::EnableXNACK
bool EnableXNACK
Definition: GCNSubtarget.h:85

llvm::GCNSubtarget::has1_5xVGPRs
bool has1_5xVGPRs() const
Definition: GCNSubtarget.h:1263

llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:340

llvm::GCNSubtarget::HasG16
bool HasG16
Definition: GCNSubtarget.h:140

llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:391

llvm::GCNSubtarget::hasImageStoreD16Bug
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:1077

llvm::GCNSubtarget::hasNonNSAEncoding
bool hasNonNSAEncoding() const
Definition: GCNSubtarget.h:1089

llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:477

llvm::GCNSubtarget::mirFileLoaded
void mirFileLoaded(MachineFunction &MF) const override
Definition: AMDGPUSubtarget.cpp:670

llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:468

llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:1119

llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:961

llvm::GCNSubtarget::hasDPPWavefrontShifts
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:1030

llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition: GCNSubtarget.h:1377

llvm::GCNSubtarget::hasAtomicFMinFMaxF64FlatInsts
bool hasAtomicFMinFMaxF64FlatInsts() const
Definition: GCNSubtarget.h:841

llvm::GCNSubtarget::hasLdsAtomicAddF64
bool hasLdsAtomicAddF64() const
Definition: GCNSubtarget.h:674

llvm::GCNSubtarget::HasSALUFloatInsts
bool HasSALUFloatInsts
Definition: GCNSubtarget.h:217

llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:684

llvm::GCNSubtarget::GFX940Insts
bool GFX940Insts
Definition: GCNSubtarget.h:108

llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition: GCNSubtarget.h:1314

llvm::GCNSubtarget::hasSDWAMac
bool hasSDWAMac() const
Definition: GCNSubtarget.h:757

llvm::GCNSubtarget::hasFP8ConversionInsts
bool hasFP8ConversionInsts() const
Definition: GCNSubtarget.h:823

llvm::GCNSubtarget::hasShift64HighRegBug
bool hasShift64HighRegBug() const
Definition: GCNSubtarget.h:1197

llvm::GCNSubtarget::hasDot7Insts
bool hasDot7Insts() const
Definition: GCNSubtarget.h:795

llvm::GCNSubtarget::HasScalarDwordx3Loads
bool HasScalarDwordx3Loads
Definition: GCNSubtarget.h:123

llvm::GCNSubtarget::hasApertureRegs
bool hasApertureRegs() const
Definition: GCNSubtarget.h:597

llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:66

llvm::GCNSubtarget::LDSMisalignedBug
bool LDSMisalignedBug
Definition: GCNSubtarget.h:211

llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:472

llvm::GCNSubtarget::hasFPAtomicToDenormModeHazard
bool hasFPAtomicToDenormModeHazard() const
Definition: GCNSubtarget.h:1227

llvm::GCNSubtarget::getAddressableNumArchVGPRs
unsigned getAddressableNumArchVGPRs() const
Definition: GCNSubtarget.h:1463

llvm::GCNSubtarget::DumpCode
bool DumpCode
Definition: GCNSubtarget.h:98

llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:627

llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:932

llvm::GCNSubtarget::hasAtomicFMinFMaxF32FlatInsts
bool hasAtomicFMinFMaxF32FlatInsts() const
Definition: GCNSubtarget.h:837

llvm::GCNSubtarget::shouldClusterStores
bool shouldClusterStores() const
Definition: GCNSubtarget.h:1566

llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1398

llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition: GCNSubtarget.h:1382

llvm::GCNSubtarget::HasSDWAScalar
bool HasSDWAScalar
Definition: GCNSubtarget.h:127

llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)

llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:1191

llvm::GCNSubtarget::hasDefaultComponentZero
bool hasDefaultComponentZero() const
Definition: GCNSubtarget.h:894

llvm::GCNSubtarget::hasGetWaveIdInst
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:908

llvm::GCNSubtarget::hasCompressedExport
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
Definition: GCNSubtarget.h:1257

llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1225

llvm::GCNSubtarget::hasDstSelForwardingHazard
bool hasDstSelForwardingHazard() const
Definition: GCNSubtarget.h:1207

llvm::GCNSubtarget::UserSGPRInit16Bug
bool UserSGPRInit16Bug
Definition: GCNSubtarget.h:115

llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:965

llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:730

llvm::GCNSubtarget::UnalignedDSAccess
bool UnalignedDSAccess
Definition: GCNSubtarget.h:214

llvm::GCNSubtarget::HasDPP8
bool HasDPP8
Definition: GCNSubtarget.h:132

llvm::GCNSubtarget::GFX11Insts
bool GFX11Insts
Definition: GCNSubtarget.h:110

llvm::GCNSubtarget::AddNoCarryInsts
bool AddNoCarryInsts
Definition: GCNSubtarget.h:209

llvm::GCNSubtarget::HasScalarStores
bool HasScalarStores
Definition: GCNSubtarget.h:124

llvm::GCNSubtarget::HasDPALU_DPP
bool HasDPALU_DPP
Definition: GCNSubtarget.h:133

llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:765

llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:1061

llvm::GCNSubtarget::GFX10_BEncoding
bool GFX10_BEncoding
Definition: GCNSubtarget.h:144

llvm::GCNSubtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:411

llvm::GCNSubtarget::HasMADIntraFwdBug
bool HasMADIntraFwdBug
Definition: GCNSubtarget.h:237

llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:815

llvm::GCNSubtarget::HasFlatAtomicFaddF32Inst
bool HasFlatAtomicFaddF32Inst
Definition: GCNSubtarget.h:176

llvm::GCNSubtarget::supportsAgentScopeFineGrainedRemoteMemoryAtomics
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
Definition: GCNSubtarget.h:890

llvm::GCNSubtarget::hasFlatScratchInsts
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:635

llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
Definition: AMDGPUSubtarget.cpp:827

llvm::GCNSubtarget::HasFlatSegmentOffsetBug
bool HasFlatSegmentOffsetBug
Definition: GCNSubtarget.h:231

llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:676

llvm::GCNSubtarget::hasArchitectedSGPRs
bool hasArchitectedSGPRs() const
Definition: GCNSubtarget.h:1323

llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition: GCNSubtarget.h:369

llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:527

llvm::GCNSubtarget::hasPrivEnabledTrap2NopBug
bool hasPrivEnabledTrap2NopBug() const
Definition: GCNSubtarget.h:1085

llvm::GCNSubtarget::HasPkFmacF16Inst
bool HasPkFmacF16Inst
Definition: GCNSubtarget.h:161

llvm::GCNSubtarget::hasMFMAInlineLiteralBug
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:1137

llvm::GCNSubtarget::NegativeScratchOffsetBug
bool NegativeScratchOffsetBug
Definition: GCNSubtarget.h:116

llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1457

llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition: GCNSubtarget.h:1556

llvm::GCNSubtarget::hasSMemTimeInst
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:912

llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:122

llvm::GCNSubtarget::HasMAIInsts
bool HasMAIInsts
Definition: GCNSubtarget.h:158

llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:585

llvm::GCNSubtarget::hasNegativeScratchOffsetBug
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:1131

llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:266

llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1533

llvm::GCNSubtarget::AutoWaitcntBeforeBarrier
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:75

llvm::GCNSubtarget::HasVscnt
bool HasVscnt
Definition: GCNSubtarget.h:192

llvm::GCNSubtarget::hasDot1Insts
bool hasDot1Insts() const
Definition: GCNSubtarget.h:771

llvm::GCNSubtarget::hasDot3Insts
bool hasDot3Insts() const
Definition: GCNSubtarget.h:779

llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:215

llvm::GCNSubtarget::hasMADIntraFwdBug
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:1081

llvm::GCNSubtarget::hasVALUMaskWriteHazard
bool hasVALUMaskWriteHazard() const
Definition: GCNSubtarget.h:1247

llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition: GCNSubtarget.h:41

llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE

llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA

llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:286

llvm::GCNSubtarget::HasDot3Insts
bool HasDot3Insts
Definition: GCNSubtarget.h:149

llvm::GCNSubtarget::HasVGPRSingleUseHintInsts
bool HasVGPRSingleUseHintInsts
Definition: GCNSubtarget.h:218

llvm::GCNSubtarget::hasAutoWaitcntBeforeBarrier
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:563

llvm::GCNSubtarget::hasNSAClauseBug
bool hasNSAClauseBug() const
Definition: GCNSubtarget.h:1221

llvm::GCNSubtarget::EnableSRAMECC
bool EnableSRAMECC
Definition: GCNSubtarget.h:189

llvm::GCNSubtarget::HasSDWAOmod
bool HasSDWAOmod
Definition: GCNSubtarget.h:126

llvm::GCNSubtarget::hasAtomicFaddRtnInsts
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:853

llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition: GCNSubtarget.h:1387

llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:311

llvm::GCNSubtarget::HasDPP
bool HasDPP
Definition: GCNSubtarget.h:131

llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
Definition: AMDGPUSubtarget.cpp:867

llvm::GCNSubtarget::HasShaderCyclesHiLoRegisters
bool HasShaderCyclesHiLoRegisters
Definition: GCNSubtarget.h:196

llvm::GCNSubtarget::EnableLoadStoreOpt
bool EnableLoadStoreOpt
Definition: GCNSubtarget.h:93

llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

llvm::GCNSubtarget::hasPkMovB32
bool hasPkMovB32() const
Definition: GCNSubtarget.h:1049

llvm::GCNSubtarget::HasForceStoreSC0SC1
bool HasForceStoreSC0SC1
Definition: GCNSubtarget.h:240

llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition: GCNSubtarget.h:1250

llvm::GCNSubtarget::hasGFX10_3Insts
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:1105

llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition: GCNSubtarget.h:953

llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:553

llvm::GCNSubtarget::hasScalarSubwordLoads
bool hasScalarSubwordLoads() const
Definition: GCNSubtarget.h:455

llvm::GCNSubtarget::hasDot11Insts
bool hasDot11Insts() const
Definition: GCNSubtarget.h:811

llvm::GCNSubtarget::enableFlatScratch
bool enableFlatScratch() const
Definition: GCNSubtarget.h:652

llvm::GCNSubtarget::KernargPreload
bool KernargPreload
Definition: GCNSubtarget.h:81

llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:681

llvm::GCNSubtarget::HasGWS
bool HasGWS
Definition: GCNSubtarget.h:208

llvm::GCNSubtarget::EnablePreciseMemory
bool EnablePreciseMemory
Definition: GCNSubtarget.h:90

llvm::GCNSubtarget::hasUnalignedBufferAccess
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:573

llvm::GCNSubtarget::GFX10_3Insts
bool GFX10_3Insts
Definition: GCNSubtarget.h:112

llvm::GCNSubtarget::HasGetWaveIdInst
bool HasGetWaveIdInst
Definition: GCNSubtarget.h:193

llvm::GCNSubtarget::hasR128A16
bool hasR128A16() const
Definition: GCNSubtarget.h:1065

llvm::GCNSubtarget::hasOffset3fBug
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:1073

llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition: GCNSubtarget.h:1146

llvm::GCNSubtarget::hasSignedScratchOffsets
bool hasSignedScratchOffsets() const
Definition: GCNSubtarget.h:1370

llvm::GCNSubtarget::GFX9Insts
bool GFX9Insts
Definition: GCNSubtarget.h:106

llvm::GCNSubtarget::HasPrivEnabledTrap2NopBug
bool HasPrivEnabledTrap2NopBug
Definition: GCNSubtarget.h:235

llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:657

llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:1123

llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:623

llvm::GCNSubtarget::hasGetPCZeroExtension
bool hasGetPCZeroExtension() const
Definition: GCNSubtarget.h:1374

llvm::GCNSubtarget::hasPermLane64
bool hasPermLane64() const
Definition: GCNSubtarget.h:1020

llvm::GCNSubtarget::requiresNopBeforeDeallocVGPRs
bool requiresNopBeforeDeallocVGPRs() const
Definition: GCNSubtarget.h:1574

llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:461

llvm::GCNSubtarget::HasNSAClauseBug
bool HasNSAClauseBug
Definition: GCNSubtarget.h:229

llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:1187

llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:609

llvm::GCNSubtarget::hasFlatAtomicFaddF32Inst
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:873

llvm::GCNSubtarget::hasKernargPreload
bool hasKernargPreload() const
Definition: GCNSubtarget.h:1341

llvm::GCNSubtarget::FlatGlobalInsts
bool FlatGlobalInsts
Definition: GCNSubtarget.h:201

llvm::GCNSubtarget::hasFP8Insts
bool hasFP8Insts() const
Definition: GCNSubtarget.h:819

llvm::GCNSubtarget::getMaxNumAGPRs
unsigned getMaxNumAGPRs(const Function &F) const
Definition: GCNSubtarget.h:1498

llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1447

llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:1150

llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:278

llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1480

llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition: GCNSubtarget.h:216

llvm::GCNSubtarget::hasRequiredExportPriority
bool hasRequiredExportPriority() const
Definition: GCNSubtarget.h:1286

llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition: GCNSubtarget.h:1210

llvm::GCNSubtarget::hasLdsWaitVMSRC
bool hasLdsWaitVMSRC() const
Definition: GCNSubtarget.h:1235

llvm::GCNSubtarget::hasMSAALoadDstSelBug
bool hasMSAALoadDstSelBug() const
Definition: GCNSubtarget.h:1083

llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: GCNSubtarget.h:1527

llvm::GCNSubtarget::hasFmaakFmamkF32Insts
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:1053

llvm::GCNSubtarget::hasVscnt
bool hasVscnt() const
Definition: GCNSubtarget.h:904

llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition: GCNSubtarget.h:741

llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:290

llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition: GCNSubtarget.h:1452

llvm::GCNSubtarget::NegativeUnalignedScratchOffsetBug
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:117

llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition: GCNSubtarget.h:1223

llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition: GCNSubtarget.h:537

llvm::GCNSubtarget::hasExtendedWaitCounts
bool hasExtendedWaitCounts() const
Definition: GCNSubtarget.h:1290

llvm::GCNSubtarget::HasImageInsts
bool HasImageInsts
Definition: GCNSubtarget.h:136

llvm::GCNSubtarget::hasLDSMisalignedBug
bool hasLDSMisalignedBug() const
Definition: GCNSubtarget.h:1179

llvm::GCNSubtarget::HasPartialNSAEncoding
bool HasPartialNSAEncoding
Definition: GCNSubtarget.h:142

llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:692

llvm::GCNSubtarget::hasFmacF64Inst
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:769

llvm::GCNSubtarget::EnablePRTStrictNull
bool EnablePRTStrictNull
Definition: GCNSubtarget.h:97

llvm::GCNSubtarget::hasInstPrefetch
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:936

llvm::GCNSubtarget::maxHardClauseLength
unsigned maxHardClauseLength() const
Definition: GCNSubtarget.h:1295

llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:737

llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:1167

llvm::GCNSubtarget::HasDLInsts
bool HasDLInsts
Definition: GCNSubtarget.h:145

llvm::GCNSubtarget::hasUserSGPRInit16Bug
bool hasUserSGPRInit16Bug() const
Definition: GCNSubtarget.h:1127

llvm::GCNSubtarget::hasExportInsts
bool hasExportInsts() const
Definition: GCNSubtarget.h:665

llvm::GCNSubtarget::hasDPP
bool hasDPP() const
Definition: GCNSubtarget.h:1022

llvm::GCNSubtarget::hasVINTERPEncoding
bool hasVINTERPEncoding() const
Definition: GCNSubtarget.h:669

llvm::GCNSubtarget::getRegBankInfo
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:298

llvm::GCNSubtarget::hasLegacyGeometry
bool hasLegacyGeometry() const
Definition: GCNSubtarget.h:1338

llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:65

llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1474

llvm::GCNSubtarget::FullRate64Ops
bool FullRate64Ops
Definition: GCNSubtarget.h:71

llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:457

llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:613

llvm::GCNSubtarget::hasScalarAtomics
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:1009

llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:270

llvm::GCNSubtarget::HasR128A16
bool HasR128A16
Definition: GCNSubtarget.h:138

llvm::GCNSubtarget::hasUnalignedScratchAccess
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:589

llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:307

llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition: AMDGPUSubtarget.cpp:243

llvm::GCNSubtarget::hasSDWAOutModsVOPC
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:761

llvm::GCNSubtarget::hasAtomicFMinFMaxF32GlobalInsts
bool hasAtomicFMinFMaxF32GlobalInsts() const
Definition: GCNSubtarget.h:829

llvm::GCNSubtarget::HasFmacF64Inst
bool HasFmacF64Inst
Definition: GCNSubtarget.h:146

llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition: AMDGPUSubtarget.cpp:743

llvm::GCNSubtarget::HasNoDataDepHazard
bool HasNoDataDepHazard
Definition: GCNSubtarget.h:198

llvm::GCNSubtarget::HasVOPDInsts
bool HasVOPDInsts
Definition: GCNSubtarget.h:238

llvm::GCNSubtarget::HasDot7Insts
bool HasDot7Insts
Definition: GCNSubtarget.h:153

llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:302

llvm::GCNSubtarget::hasAtomicCSubNoRtnInsts
bool hasAtomicCSubNoRtnInsts() const
Definition: GCNSubtarget.h:1351

llvm::GCNSubtarget::HasSDWAOutModsVOPC
bool HasSDWAOutModsVOPC
Definition: GCNSubtarget.h:130

llvm::GCNSubtarget::EnableCuMode
bool EnableCuMode
Definition: GCNSubtarget.h:88

llvm::GCNSubtarget::hasScalarFlatScratchInsts
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:648

llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:66

llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:63

llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:1141

llvm::GCNSubtarget::hasVALUPartialForwardingHazard
bool hasVALUPartialForwardingHazard() const
Definition: GCNSubtarget.h:1237

llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:513

llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:928

llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:121

llvm::GCNSubtarget::hasUnalignedDSAccess
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:581

llvm::GCNSubtarget::hasRestrictedSOffset
bool hasRestrictedSOffset() const
Definition: GCNSubtarget.h:1284

llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:427

llvm::GCNSubtarget::hasIntClamp
bool hasIntClamp() const
Definition: GCNSubtarget.h:357

llvm::GCNSubtarget::hasGFX10_AEncoding
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:1097

llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:680

llvm::GCNSubtarget::GFX10_AEncoding
bool GFX10_AEncoding
Definition: GCNSubtarget.h:143

llvm::GCNSubtarget::MIMG_R128
bool MIMG_R128
Definition: GCNSubtarget.h:103

llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:509

llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:274

llvm::GCNSubtarget::HasVcmpxPermlaneHazard
bool HasVcmpxPermlaneHazard
Definition: GCNSubtarget.h:222

llvm::GCNSubtarget::hasPackedFP32Ops
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:1044

llvm::GCNSubtarget::SupportsXNACK
bool SupportsXNACK
Definition: GCNSubtarget.h:80

llvm::GCNSubtarget::hasTransForwardingHazard
bool hasTransForwardingHazard() const
Definition: GCNSubtarget.h:1203

llvm::GCNSubtarget::hasDot6Insts
bool hasDot6Insts() const
Definition: GCNSubtarget.h:791

llvm::GCNSubtarget::HasFmaMixInsts
bool HasFmaMixInsts
Definition: GCNSubtarget.h:120

llvm::GCNSubtarget::BackOffBarrier
bool BackOffBarrier
Definition: GCNSubtarget.h:76

llvm::GCNSubtarget::TrapID
TrapID
Definition: GCNSubtarget.h:46

llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap

llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap

llvm::GCNSubtarget::hasGFX940Insts
bool hasGFX940Insts() const
Definition: GCNSubtarget.h:1276

llvm::GCNSubtarget::hasLshlAddB64
bool hasLshlAddB64() const
Definition: GCNSubtarget.h:1113

llvm::GCNSubtarget::HasScalarAtomics
bool HasScalarAtomics
Definition: GCNSubtarget.h:125

llvm::GCNSubtarget::hasFullRate64Ops
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:377

llvm::GCNSubtarget::EnableSIScheduler
bool EnableSIScheduler
Definition: GCNSubtarget.h:95

llvm::GCNSubtarget::hasScalarStores
bool hasScalarStores() const
Definition: GCNSubtarget.h:1005

llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:601

llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:955

llvm::GCNSubtarget::hasLDSFPAtomicAddF64
bool hasLDSFPAtomicAddF64() const
Definition: GCNSubtarget.h:1014

llvm::GCNSubtarget::HasAtomicFlatPkAdd16Insts
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:167

llvm::GCNSubtarget::hasFlatGlobalInsts
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:631

llvm::GCNSubtarget::hasDX10ClampMode
bool hasDX10ClampMode() const
Definition: GCNSubtarget.h:1354

llvm::GCNSubtarget::HasOffset3fBug
bool HasOffset3fBug
Definition: GCNSubtarget.h:230

llvm::GCNSubtarget::getNSAThreshold
unsigned getNSAThreshold(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:1023

llvm::GCNSubtarget::HasDot4Insts
bool HasDot4Insts
Definition: GCNSubtarget.h:150

llvm::GCNSubtarget::HasDot9Insts
bool HasDot9Insts
Definition: GCNSubtarget.h:155

llvm::GCNSubtarget::HasAtomicFMinFMaxF32GlobalInsts
bool HasAtomicFMinFMaxF32GlobalInsts
Definition: GCNSubtarget.h:162

llvm::GCNSubtarget::HasDPPSrc1SGPR
bool HasDPPSrc1SGPR
Definition: GCNSubtarget.h:134

llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:966

llvm::GCNSubtarget::HasAtomicFMinFMaxF32FlatInsts
bool HasAtomicFMinFMaxF32FlatInsts
Definition: GCNSubtarget.h:164

llvm::GCNSubtarget::HasPseudoScalarTrans
bool HasPseudoScalarTrans
Definition: GCNSubtarget.h:219

llvm::GCNSubtarget::hasReadM0LdsDmaHazard
bool hasReadM0LdsDmaHazard() const
Definition: GCNSubtarget.h:1159

llvm::GCNSubtarget::hasScalarSMulU64
bool hasScalarSMulU64() const
Definition: GCNSubtarget.h:730

llvm::GCNSubtarget::HasDot8Insts
bool HasDot8Insts
Definition: GCNSubtarget.h:154

llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:336

llvm::GCNSubtarget::HasShaderCyclesRegister
bool HasShaderCyclesRegister
Definition: GCNSubtarget.h:195

llvm::GCNSubtarget::FlatInstOffsets
bool FlatInstOffsets
Definition: GCNSubtarget.h:200

llvm::GCNSubtarget::hasShaderCyclesHiLoRegisters
bool hasShaderCyclesHiLoRegisters() const
Definition: GCNSubtarget.h:920

llvm::GCNSubtarget::hasSDWASdst
bool hasSDWASdst() const
Definition: GCNSubtarget.h:753

llvm::GCNSubtarget::HasDefaultComponentBroadcast
bool HasDefaultComponentBroadcast
Definition: GCNSubtarget.h:180

llvm::GCNSubtarget::HasNoSdstCMPX
bool HasNoSdstCMPX
Definition: GCNSubtarget.h:191

llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:447

llvm::GCNSubtarget::hasFFBL
bool hasFFBL() const
Definition: GCNSubtarget.h:415

llvm::GCNSubtarget::hasNSAEncoding
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:1087

llvm::GCNSubtarget::hasSMemRealTime
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:985

llvm::GCNSubtarget::EnableTgSplit
bool EnableTgSplit
Definition: GCNSubtarget.h:87

llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:619

llvm::GCNSubtarget::hasDPPBroadcasts
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:1026

llvm::GCNSubtarget::HasFP8Insts
bool HasFP8Insts
Definition: GCNSubtarget.h:159

llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:559

llvm::GCNSubtarget::hasMovB64
bool hasMovB64() const
Definition: GCNSubtarget.h:1111

llvm::GCNSubtarget::HalfRate64Ops
bool HalfRate64Ops
Definition: GCNSubtarget.h:70

llvm::GCNSubtarget::HasSMemTimeInst
bool HasSMemTimeInst
Definition: GCNSubtarget.h:194

llvm::GCNSubtarget::HasFP8ConversionInsts
bool HasFP8ConversionInsts
Definition: GCNSubtarget.h:160

llvm::GCNSubtarget::GFX8Insts
bool GFX8Insts
Definition: GCNSubtarget.h:105

llvm::GCNSubtarget::hasInstFwdPrefetchBug
bool hasInstFwdPrefetchBug() const
Definition: GCNSubtarget.h:1183

llvm::GCNSubtarget::HasDot2Insts
bool HasDot2Insts
Definition: GCNSubtarget.h:148

llvm::GCNSubtarget::hasAtomicFMinFMaxF64GlobalInsts
bool hasAtomicFMinFMaxF64GlobalInsts() const
Definition: GCNSubtarget.h:833

llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition: GCNSubtarget.h:423

llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:717

llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:989

llvm::GCNSubtarget::HasVOP3Literal
bool HasVOP3Literal
Definition: GCNSubtarget.h:197

llvm::GCNSubtarget::hasNullExportTarget
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
Definition: GCNSubtarget.h:1261

llvm::GCNSubtarget::UnalignedScratchAccess
bool UnalignedScratchAccess
Definition: GCNSubtarget.h:77

llvm::GCNSubtarget::hasAtomicFlatPkAdd16Insts
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:847

llvm::GCNSubtarget::HasImageGather4D16Bug
bool HasImageGather4D16Bug
Definition: GCNSubtarget.h:233

llvm::GCNSubtarget::hasBFI
bool hasBFI() const
Definition: GCNSubtarget.h:403

llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:702

llvm::GCNSubtarget::HasSMEMtoVectorWriteHazard
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:224

llvm::GCNSubtarget::HasAtomicFaddNoRtnInsts
bool HasAtomicFaddNoRtnInsts
Definition: GCNSubtarget.h:169

llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:1175

llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:689

llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1519

llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:993

llvm::GCNSubtarget::HasGDS
bool HasGDS
Definition: GCNSubtarget.h:207

llvm::GCNSubtarget::HasAtomicBufferGlobalPkAddF16Insts
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:172

llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:577

llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:696

llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:344

llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1538

llvm::GCNSubtarget::HasAtomicCSubNoRtnInsts
bool HasAtomicCSubNoRtnInsts
Definition: GCNSubtarget.h:173

llvm::GCNSubtarget::hasImageInsts
bool hasImageInsts() const
Definition: GCNSubtarget.h:1057

llvm::GCNSubtarget::HasAtomicDsPkAdd16Insts
bool HasAtomicDsPkAdd16Insts
Definition: GCNSubtarget.h:166

llvm::GCNSubtarget::hasImageGather4D16Bug
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:1079

llvm::GCNSubtarget::FMA
bool FMA
Definition: GCNSubtarget.h:102

llvm::GCNSubtarget::HasRequiredExportPriority
bool HasRequiredExportPriority
Definition: GCNSubtarget.h:241

llvm::GCNSubtarget::hasFMA
bool hasFMA() const
Definition: GCNSubtarget.h:439

llvm::GCNSubtarget::hasDot10Insts
bool hasDot10Insts() const
Definition: GCNSubtarget.h:807

llvm::GCNSubtarget::hasSPackHL
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
Definition: GCNSubtarget.h:1253

llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:1171

llvm::GCNSubtarget::hasCvtFP8VOP1Bug
bool hasCvtFP8VOP1Bug() const
Definition: GCNSubtarget.h:1347

llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:522

llvm::GCNSubtarget::HasAtomicBufferPkAddBF16Inst
bool HasAtomicBufferPkAddBF16Inst
Definition: GCNSubtarget.h:175

llvm::GCNSubtarget::hasNegativeUnalignedScratchOffsetBug
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:1133

llvm::GCNSubtarget::hasFFBH
bool hasFFBH() const
Definition: GCNSubtarget.h:419

llvm::GCNSubtarget::hasFlatScratchSVSMode
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:646

llvm::GCNSubtarget::supportsWGP
bool supportsWGP() const
Definition: GCNSubtarget.h:355

llvm::GCNSubtarget::hasG16
bool hasG16() const
Definition: GCNSubtarget.h:1071

llvm::GCNSubtarget::GFX90AInsts
bool GFX90AInsts
Definition: GCNSubtarget.h:107

llvm::GCNSubtarget::HasMSAALoadDstSelBug
bool HasMSAALoadDstSelBug
Definition: GCNSubtarget.h:234

llvm::GCNSubtarget::HasPackedFP32Ops
bool HasPackedFP32Ops
Definition: GCNSubtarget.h:135

llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:373

llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:849

llvm::GCNSubtarget::HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:171

llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:1217

llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:204

llvm::GCNSubtarget::CIInsts
bool CIInsts
Definition: GCNSubtarget.h:104

llvm::GCNSubtarget::getNSAMaxSize
unsigned getNSAMaxSize(bool HasSampler=false) const
Definition: GCNSubtarget.h:1093

llvm::GCNSubtarget::hasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:857

llvm::GCNSubtarget::hasMIMG_R128
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:365

llvm::GCNSubtarget::createFillMFMAShadowMutation
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
Definition: AMDGPUSubtarget.cpp:1018

llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:691

llvm::GCNSubtarget::hasVOP3DPP
bool hasVOP3DPP() const
Definition: GCNSubtarget.h:1231

llvm::GCNSubtarget::hasAtomicBufferPkAddBF16Inst
bool hasAtomicBufferPkAddBF16Inst() const
Definition: GCNSubtarget.h:869

llvm::GCNSubtarget::HasAgentScopeFineGrainedRemoteMemoryAtomics
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
Definition: GCNSubtarget.h:179

llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1543

llvm::GCNSubtarget::hasDPP8
bool hasDPP8() const
Definition: GCNSubtarget.h:1034

llvm::GCNSubtarget::hasDot5Insts
bool hasDot5Insts() const
Definition: GCNSubtarget.h:787

llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:981

llvm::GCNSubtarget::hasAtomicFaddNoRtnInsts
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:855

llvm::GCNSubtarget::MaxHardClauseLength
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition: GCNSubtarget.h:184

llvm::GCNSubtarget::hasPermLaneX16
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:1017

llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug
bool hasFlatScratchSVSSwizzleBug() const
Definition: GCNSubtarget.h:1267

llvm::GCNSubtarget::hasFlatBufferGlobalAtomicFaddF64Inst
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
Definition: GCNSubtarget.h:877

llvm::GCNSubtarget::hasIEEEMode
bool hasIEEEMode() const
Definition: GCNSubtarget.h:1357

llvm::GCNSubtarget::hasScalarDwordx3Loads
bool hasScalarDwordx3Loads() const
Definition: GCNSubtarget.h:1003

llvm::GCNSubtarget::hasVDecCoExecHazard
bool hasVDecCoExecHazard() const
Definition: GCNSubtarget.h:1213

llvm::GCNSubtarget::FastDenormalF32
bool FastDenormalF32
Definition: GCNSubtarget.h:69

llvm::GCNSubtarget::HasImageStoreD16Bug
bool HasImageStoreD16Bug
Definition: GCNSubtarget.h:232

llvm::GCNSubtarget::UnalignedAccessMode
bool UnalignedAccessMode
Definition: GCNSubtarget.h:78

llvm::GCNSubtarget::hasLDSFPAtomicAddF32
bool hasLDSFPAtomicAddF32() const
Definition: GCNSubtarget.h:1013

llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1550

llvm::GCNSubtarget::EnableDS128
bool EnableDS128
Definition: GCNSubtarget.h:96

llvm::GCNSubtarget::HasApertureRegs
bool HasApertureRegs
Definition: GCNSubtarget.h:79

llvm::GCNSubtarget::hasBFM
bool hasBFM() const
Definition: GCNSubtarget.h:407

llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:547

llvm::GCNSubtarget::hasDelayAlu
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
Definition: GCNSubtarget.h:1270

llvm::GCNSubtarget::HasNSAtoVMEMBug
bool HasNSAtoVMEMBug
Definition: GCNSubtarget.h:228

llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:1154

llvm::GCNSubtarget::HasA16
bool HasA16
Definition: GCNSubtarget.h:139

llvm::GCNSubtarget::hasDot8Insts
bool hasDot8Insts() const
Definition: GCNSubtarget.h:799

llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:451

llvm::GCNSubtarget::hasSCmpK
bool hasSCmpK() const
Definition: GCNSubtarget.h:943

llvm::GCNSubtarget::hasPseudoScalarTrans
bool hasPseudoScalarTrans() const
Definition: GCNSubtarget.h:1282

llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:294

llvm::GCNSubtarget::HasSDWASdst
bool HasSDWASdst
Definition: GCNSubtarget.h:128

llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:542

llvm::GCNSubtarget::hasGWS
bool hasGWS() const
Definition: GCNSubtarget.h:1329

llvm::GCNSubtarget::HasAtomicFMinFMaxF64GlobalInsts
bool HasAtomicFMinFMaxF64GlobalInsts
Definition: GCNSubtarget.h:163

llvm::GCNSubtarget::hasReadM0LdsDirectHazard
bool hasReadM0LdsDirectHazard() const
Definition: GCNSubtarget.h:1163

llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:531

llvm::GCNSubtarget::EnableFlatScratch
bool EnableFlatScratch
Definition: GCNSubtarget.h:205

llvm::GCNSubtarget::hasHalfRate64Ops
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)

llvm::GCNSubtarget::hasVOPDInsts
bool hasVOPDInsts() const
Definition: GCNSubtarget.h:1265

llvm::GCNSubtarget::hasGFX10_BEncoding
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:1101

llvm::GCNSubtarget::TSInfo
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:248

llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:317

llvm::GCNSubtarget::GFX7GFX8GFX9Insts
bool GFX7GFX8GFX9Insts
Definition: GCNSubtarget.h:113

llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1404

llvm::GCNSubtarget::hasForceStoreSC0SC1
bool hasForceStoreSC0SC1() const
Definition: GCNSubtarget.h:1243

llvm::GCNSubtarget::hasVOP3Literal
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:924

llvm::GCNSubtarget::HasDot10Insts
bool HasDot10Insts
Definition: GCNSubtarget.h:156

llvm::GCNSubtarget::hasAtomicBufferGlobalPkAddF16Insts
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:861

llvm::GCNSubtarget::HasSDWAMac
bool HasSDWAMac
Definition: GCNSubtarget.h:129

llvm::GCNSubtarget::hasNoSdstCMPX
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:900

llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs() const
Definition: GCNSubtarget.h:1468

llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:605

llvm::GCNSubtarget::hasScalarAddSub64
bool hasScalarAddSub64() const
Definition: GCNSubtarget.h:728

llvm::GCNSubtarget::hasIEEEMinMax3
bool hasIEEEMinMax3() const
Definition: GCNSubtarget.h:1363

llvm::GCNSubtarget::hasSplitBarriers
bool hasSplitBarriers() const
Definition: GCNSubtarget.h:1344

llvm::GCNSubtarget::hasUnpackedD16VMem
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:732

llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:972

llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:494

llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:504

llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:500

llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:74

llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:642

llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
Definition: AMDGPUSubtarget.cpp:701

llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:716

llvm::GCNSubtarget::hasDPALU_DPP
bool hasDPALU_DPP() const
Definition: GCNSubtarget.h:1038

llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:1115

llvm::GCNSubtarget::hasAtomicGlobalPkAddBF16Inst
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:865

llvm::GCNSubtarget::FlatScratchInsts
bool FlatScratchInsts
Definition: GCNSubtarget.h:202

llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:381

llvm::GCNSubtarget::HasVMEMtoScalarWriteHazard
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:223

llvm::GCNSubtarget::SGPRInitBug
bool SGPRInitBug
Definition: GCNSubtarget.h:114

llvm::GCNSubtarget::HasAtomicGlobalPkAddBF16Inst
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:174

llvm::GCNSubtarget::hasUnalignedAccessMode
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:593

llvm::GCNSubtarget::FP64
bool FP64
Definition: GCNSubtarget.h:101

llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1392

llvm::GCNSubtarget::HasDot1Insts
bool HasDot1Insts
Definition: GCNSubtarget.h:147

llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:483

llvm::GCNSubtarget::HasAtomicFaddRtnInsts
bool HasAtomicFaddRtnInsts
Definition: GCNSubtarget.h:168

llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition: GCNSubtarget.h:1523

llvm::GCNSubtarget::HasRestrictedSOffset
bool HasRestrictedSOffset
Definition: GCNSubtarget.h:220

llvm::GCNSubtarget::RequiresCOV6
bool RequiresCOV6
Definition: GCNSubtarget.h:243

llvm::GCNSubtarget::hasIEEEMinMax
bool hasIEEEMinMax() const
Definition: GCNSubtarget.h:1360

llvm::GCNSubtarget::HasArchitectedSGPRs
bool HasArchitectedSGPRs
Definition: GCNSubtarget.h:206

llvm::GCNSubtarget::TrapHandler
bool TrapHandler
Definition: GCNSubtarget.h:89

llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:431

llvm::GCNSubtarget::HasVALUTransUseHazard
bool HasVALUTransUseHazard
Definition: GCNSubtarget.h:239

llvm::GCNSubtarget::hasCARRY
bool hasCARRY() const
Definition: GCNSubtarget.h:435

llvm::GCNSubtarget::HasSMemRealTime
bool HasSMemRealTime
Definition: GCNSubtarget.h:118

llvm::GCNSubtarget::hasPackedTID
bool hasPackedTID() const
Definition: GCNSubtarget.h:1272

llvm::GCNSubtarget::HasVcmpxExecWARHazard
bool HasVcmpxExecWARHazard
Definition: GCNSubtarget.h:226

llvm::GCNSubtarget::HasNSAEncoding
bool HasNSAEncoding
Definition: GCNSubtarget.h:141

llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:361

llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:724

llvm::GCNSubtarget::ScalarFlatScratchInsts
bool ScalarFlatScratchInsts
Definition: GCNSubtarget.h:203

llvm::GCNSubtarget::hasVALUTransUseHazard
bool hasVALUTransUseHazard() const
Definition: GCNSubtarget.h:1241

llvm::GCNSubtarget::HasDot5Insts
bool HasDot5Insts
Definition: GCNSubtarget.h:151

llvm::GCNSubtarget::hasShaderCyclesRegister
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:916

llvm::GCNSubtarget::HasMFMAInlineLiteralBug
bool HasMFMAInlineLiteralBug
Definition: GCNSubtarget.h:212

llvm::GCNSubtarget::HasIntClamp
bool HasIntClamp
Definition: GCNSubtarget.h:119

llvm::GCNSubtarget::UnalignedBufferAccess
bool UnalignedBufferAccess
Definition: GCNSubtarget.h:213

llvm::GCNSubtarget::HasUnpackedD16VMem
bool HasUnpackedD16VMem
Definition: GCNSubtarget.h:210

llvm::GCNSubtarget::hasSALUFloatInsts
bool hasSALUFloatInsts() const
Definition: GCNSubtarget.h:1278

llvm::GCNSubtarget::hasVGPRSingleUseHintInsts
bool hasVGPRSingleUseHintInsts() const
Definition: GCNSubtarget.h:1280

llvm::GCNSubtarget::TargetTriple
Triple TargetTriple
Definition: GCNSubtarget.h:61

llvm::GCNSubtarget::EnableUnsafeDSOffsetFolding
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:94

llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition: GCNSubtarget.h:395

llvm::GCNSubtarget::isPreciseMemoryEnabled
bool isPreciseMemoryEnabled() const
Definition: GCNSubtarget.h:617

llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:653

llvm::GCNSubtarget::hasDPPSrc1SGPR
bool hasDPPSrc1SGPR() const
Definition: GCNSubtarget.h:1042

llvm::GCNSubtarget::hasGDS
bool hasGDS() const
Definition: GCNSubtarget.h:1326

llvm::GCNSubtarget::getMaxWaveScratchSize
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:321

llvm::GCNSubtarget::GFX12Insts
bool GFX12Insts
Definition: GCNSubtarget.h:111

llvm::GCNSubtarget::HasDefaultComponentZero
bool HasDefaultComponentZero
Definition: GCNSubtarget.h:178

llvm::GCNSubtarget::HasMemoryAtomicFaddF32DenormalSupport
bool HasMemoryAtomicFaddF32DenormalSupport
Definition: GCNSubtarget.h:170

llvm::GCNSubtarget::hasDot4Insts
bool hasDot4Insts() const
Definition: GCNSubtarget.h:783

llvm::GCNSubtarget::FeatureDisable
bool FeatureDisable
Definition: GCNSubtarget.h:246

llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
Definition: AMDGPUSubtarget.cpp:1012

llvm::GCNSubtarget::flatScratchIsArchitected
bool flatScratchIsArchitected() const
Definition: GCNSubtarget.h:1320

llvm::GCNSubtarget::hasPartialNSAEncoding
bool hasPartialNSAEncoding() const
Definition: GCNSubtarget.h:1091

llvm::GCNSubtarget::HasInstFwdPrefetchBug
bool HasInstFwdPrefetchBug
Definition: GCNSubtarget.h:225

llvm::GCNSubtarget::checkSubtargetFeatures
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Definition: AMDGPUSubtarget.cpp:177

llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override

llvm::GCNSubtarget::FlatAddressSpace
bool FlatAddressSpace
Definition: GCNSubtarget.h:199

llvm::GCNSubtarget::hasDot9Insts
bool hasDot9Insts() const
Definition: GCNSubtarget.h:803

llvm::GCNSubtarget::HasPackedTID
bool HasPackedTID
Definition: GCNSubtarget.h:215

llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:661

llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:62

llvm::GCNSubtarget::hasDefaultComponentBroadcast
bool hasDefaultComponentBroadcast() const
Definition: GCNSubtarget.h:896

llvm::GCNSubtarget::requiresCodeObjectV6
bool requiresCodeObjectV6() const
Definition: GCNSubtarget.h:1245

llvm::GCNSubtarget::SupportsSRAMECC
bool SupportsSRAMECC
Definition: GCNSubtarget.h:185

llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:282

llvm::GCNSubtarget::hasBFE
bool hasBFE() const
Definition: GCNSubtarget.h:399

llvm::GCNSubtarget::hasLdsDirect
bool hasLdsDirect() const
Definition: GCNSubtarget.h:1233

llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:711

llvm::GCNSubtarget::HasFlatBufferGlobalAtomicFaddF64Inst
bool HasFlatBufferGlobalAtomicFaddF64Inst
Definition: GCNSubtarget.h:177

llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:85

llvm::GCNUserSGPRUsageInfo
Definition: GCNSubtarget.h:1581

llvm::GCNUserSGPRUsageInfo::getNumUserSGPRForField
static unsigned getNumUserSGPRForField(UserSGPRID ID)
Definition: GCNSubtarget.h:1619

llvm::GCNUserSGPRUsageInfo::hasQueuePtr
bool hasQueuePtr() const
Definition: GCNSubtarget.h:1589

llvm::GCNUserSGPRUsageInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition: GCNSubtarget.h:1591

llvm::GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
Definition: AMDGPUSubtarget.cpp:1120

llvm::GCNUserSGPRUsageInfo::hasDispatchID
bool hasDispatchID() const
Definition: GCNSubtarget.h:1593

llvm::GCNUserSGPRUsageInfo::UserSGPRID
UserSGPRID
Definition: GCNSubtarget.h:1607

llvm::GCNUserSGPRUsageInfo::ImplicitBufferPtrID
@ ImplicitBufferPtrID
Definition: GCNSubtarget.h:1608

llvm::GCNUserSGPRUsageInfo::DispatchIdID
@ DispatchIdID
Definition: GCNSubtarget.h:1613

llvm::GCNUserSGPRUsageInfo::QueuePtrID
@ QueuePtrID
Definition: GCNSubtarget.h:1611

llvm::GCNUserSGPRUsageInfo::DispatchPtrID
@ DispatchPtrID
Definition: GCNSubtarget.h:1610

llvm::GCNUserSGPRUsageInfo::FlatScratchInitID
@ FlatScratchInitID
Definition: GCNSubtarget.h:1614

llvm::GCNUserSGPRUsageInfo::PrivateSegmentBufferID
@ PrivateSegmentBufferID
Definition: GCNSubtarget.h:1609

llvm::GCNUserSGPRUsageInfo::PrivateSegmentSizeID
@ PrivateSegmentSizeID
Definition: GCNSubtarget.h:1615

llvm::GCNUserSGPRUsageInfo::KernargSegmentPtrID
@ KernargSegmentPtrID
Definition: GCNSubtarget.h:1612

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition: GCNSubtarget.h:1585

llvm::GCNUserSGPRUsageInfo::getNumFreeUserSGPRs
unsigned getNumFreeUserSGPRs()
Definition: AMDGPUSubtarget.cpp:1126

llvm::GCNUserSGPRUsageInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition: GCNSubtarget.h:1583

llvm::GCNUserSGPRUsageInfo::getNumKernargPreloadSGPRs
unsigned getNumKernargPreloadSGPRs() const
Definition: GCNSubtarget.h:1599

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentSize
bool hasPrivateSegmentSize() const
Definition: GCNSubtarget.h:1597

llvm::GCNUserSGPRUsageInfo::getNumUsedUserSGPRs
unsigned getNumUsedUserSGPRs() const
Definition: GCNSubtarget.h:1601

llvm::GCNUserSGPRUsageInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition: GCNSubtarget.h:1587

llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: GCNSubtarget.h:1595

llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28

llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:110

llvm::InstructionSelector
Definition: InstructionSelector.h:19

llvm::LegalizerInfo
Definition: LegalizerInfo.h:1239

llvm::Legalizer
Definition: Legalizer.h:37

llvm::MachineFunction
Definition: MachineFunction.h:258

llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49

llvm::SIFrameLowering
Definition: SIFrameLowering.h:17

llvm::SIInstrInfo
Definition: SIInstrInfo.h:83

llvm::SIInstrInfo::getRegisterInfo
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:222

llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:32

llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: SIRegisterInfo.h:345

llvm::SITargetLowering
Definition: SIISelLowering.h:31

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242

llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:111

llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:63

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

unsigned

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:1121

llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1133

llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:975

llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:943

llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:985

llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1004

llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:1195

llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1015

llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1146

llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:1027

llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:981

llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition: AMDGPUBaseInfo.cpp:1044

llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:962

llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:995

llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:1219

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:1103

llvm::AMDGPU::IsaInfo::getAddressableNumArchVGPRs
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1144

llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1008

llvm::AMDGPU::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:2098

llvm::AMDGPU::getNSAMaxSize
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
Definition: AMDGPUBaseInfo.cpp:2087

llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1993

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::countl_zero
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:184