docs/doxygen/AMDGPUTargetTransformInfo_8h_source.html

//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This file a TargetTransformInfoImplBase conforming object specific to the

/// AMDGPU target machine. It uses the target's detailed information to

/// provide more precise answers to certain TTI queries, while letting the

/// target independent and default TTI implementations handle the rest.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H


#include "AMDGPU.h"

#include "llvm/CodeGen/BasicTTIImpl.h"

#include "llvm/Support/AMDGPUAddrSpace.h"

#include <optional>


namespace llvm {


class AMDGPUTargetMachine;

class GCNSubtarget;

class InstCombiner;

class Loop;

class ScalarEvolution;

class SITargetLowering;

class Type;

class Value;


class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {

  using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;

  using TTI = TargetTransformInfo;


  friend BaseT;


  Triple TargetTriple;


  const TargetSubtargetInfo *ST;

  const TargetLoweringBase *TLI;


  const TargetSubtargetInfo *getST() const { return ST; }

  const TargetLoweringBase *getTLI() const { return TLI; }


public:

  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);


  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                               TTI::UnrollingPreferences &UP,

                               OptimizationRemarkEmitter *ORE) const override;


  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                             TTI::PeelingPreferences &PP) const override;


  uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override;

};


class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {

  using BaseT = BasicTTIImplBase<GCNTTIImpl>;

  using TTI = TargetTransformInfo;


  friend BaseT;


  const GCNSubtarget *ST;

  const SITargetLowering *TLI;

  AMDGPUTTIImpl CommonTTI;

  bool IsGraphics;

  bool HasFP32Denormals;

  bool HasFP64FP16Denormals;

  static constexpr bool InlinerVectorBonusPercent = 0;


  static const FeatureBitset InlineFeatureIgnoreList;


  const GCNSubtarget *getST() const { return ST; }

  const SITargetLowering *getTLI() const { return TLI; }


  static inline int getFullRateInstrCost() {

    return TargetTransformInfo::TCC_Basic;

  }


  static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {

    return CostKind == TTI::TCK_CodeSize ? 2

                                         : 2 * TargetTransformInfo::TCC_Basic;

  }


  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe

  // should be 2 or 4.

  static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {

    return CostKind == TTI::TCK_CodeSize ? 2

                                         : 4 * TargetTransformInfo::TCC_Basic;

  }


  int getTransInstrCost(TTI::TargetCostKind CostKind) const;


  // On some parts, normal fp64 operations are half rate, and others

  // quarter. This also applies to some integer operations.

  int get64BitInstrCost(TTI::TargetCostKind CostKind) const;


  std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const;


  /// \returns true if V might be divergent even when all of its operands

  /// are uniform.

  bool isSourceOfDivergence(const Value *V) const;


  /// Returns true for the target specific set of operations which produce

  /// uniform result even taking non-uniform arguments.

  bool isAlwaysUniform(const Value *V) const;


public:

  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);


  bool hasBranchDivergence(const Function *F = nullptr) const override;


  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                               TTI::UnrollingPreferences &UP,

                               OptimizationRemarkEmitter *ORE) const override;


  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                             TTI::PeelingPreferences &PP) const override;


  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override {

    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");

    return TTI::PSK_FastHardware;

  }


  unsigned getNumberOfRegisters(unsigned RCID) const override;

  TypeSize

  getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override;

  unsigned getMinVectorRegisterBitWidth() const override;

  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;

  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,

                               unsigned ChainSizeInBytes,

                               VectorType *VecTy) const override;

  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,

                                unsigned ChainSizeInBytes,

                                VectorType *VecTy) const override;

  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;


  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,

                                  unsigned AddrSpace) const;

  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,

                                   unsigned AddrSpace) const override;

  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,

                                    unsigned AddrSpace) const override;


  uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override;

  Type *getMemcpyLoopLoweringType(

      LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,

      unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,

      std::optional<uint32_t> AtomicElementSize) const override;


  void getMemcpyLoopResidualLoweringType(

      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,

      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,

      Align SrcAlign, Align DestAlign,

      std::optional<uint32_t> AtomicCpySize) const override;

  unsigned getMaxInterleaveFactor(ElementCount VF) const override;


  bool getTgtMemIntrinsic(IntrinsicInst *Inst,

                          MemIntrinsicInfo &Info) const override;


  InstructionCost getArithmeticInstrCost(

      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},

      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},

      ArrayRef<const Value *> Args = {},

      const Instruction *CxtI = nullptr) const override;


  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,

                                 const Instruction *I = nullptr) const override;


  bool isInlineAsmSourceOfDivergence(const CallInst *CI,

                                     ArrayRef<unsigned> Indices = {}) const;


  using BaseT::getVectorInstrCost;

  InstructionCost

  getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind,

                     unsigned Index, const Value *Op0, const Value *Op1,

                     TTI::VectorInstrContext VIC =

                         TTI::VectorInstrContext::None) const override;


  bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;


  bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {

    // Address space casts must cast between different address spaces.

    if (FromAS == ToAS)

      return false;


    // Casts between any aliasing address spaces are valid.

    return AMDGPU::addrspacesMayAlias(FromAS, ToAS);

  }


  bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {

    return AMDGPU::addrspacesMayAlias(AS0, AS1);

  }


  unsigned getFlatAddressSpace() const override {

    // Don't bother running InferAddressSpaces pass on graphics shaders which

    // don't use flat addressing.

    if (IsGraphics)

      return -1;

    return AMDGPUAS::FLAT_ADDRESS;

  }


  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,

                                  Intrinsic::ID IID) const override;


  bool


  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {

    return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&

           AS != AMDGPUAS::PRIVATE_ADDRESS;

  }


  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,

                                          Value *NewV) const override;


  bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,

                                 const Value *Op1, InstCombiner &IC) const;


  bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II,

                                   unsigned LaneAgIdx) const;


  std::optional<Instruction *>

  instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;


  Value *simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC,

                                             IntrinsicInst &II,

                                             const APInt &DemandedElts,

                                             APInt &UndefElts) const;


  Instruction *hoistLaneIntrinsicThroughOperand(InstCombiner &IC,

                                                IntrinsicInst &II) const;


  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(

      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,

      APInt &UndefElts2, APInt &UndefElts3,

      std::function<void(Instruction *, unsigned, APInt, APInt &)>

          SimplifyAndSetOp) const override;


  InstructionCost getVectorSplitCost() const { return 0; }


  InstructionCost

  getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,

                 ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,

                 VectorType *SubTp, ArrayRef<const Value *> Args = {},

                 const Instruction *CxtI = nullptr) const override;


  bool isProfitableToSinkOperands(Instruction *I,

                                  SmallVectorImpl<Use *> &Ops) const override;


  bool areInlineCompatible(const Function *Caller,

                           const Function *Callee) const override;


  int getInliningLastCallToStaticBonus() const override;

  unsigned getInliningThresholdMultiplier() const override { return 11; }

  unsigned adjustInliningThreshold(const CallBase *CB) const override;

  unsigned getCallerAllocaCost(const CallBase *CB,

                               const AllocaInst *AI) const override;


  int getInlinerVectorBonusPercent() const override {

    return InlinerVectorBonusPercent;

  }


  InstructionCost

  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,

                             std::optional<FastMathFlags> FMF,

                             TTI::TargetCostKind CostKind) const override;


  InstructionCost getPartialReductionCost(

      unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,

      ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,

      TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,

      TTI::TargetCostKind CostKind,

      std::optional<FastMathFlags> FMF) const override {

    return InstructionCost::getInvalid();

  }


  InstructionCost

  getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

                        TTI::TargetCostKind CostKind) const override;

  InstructionCost

  getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,

                         TTI::TargetCostKind CostKind) const override;


  /// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.

  unsigned getCacheLineSize() const override { return 128; }


  /// How much before a load we should place the prefetch instruction.

  /// This is currently measured in number of IR instructions.

  unsigned getPrefetchDistance() const override;


  /// \return if target want to issue a prefetch in address space \p AS.

  bool shouldPrefetchAddressSpace(unsigned AS) const override;

  void collectKernelLaunchBounds(

      const Function &F,

      SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;


  enum class KnownIEEEMode { Unknown, On, Off };


  /// Return KnownIEEEMode::On if we know if the use context can assume

  /// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume

  /// "amdgpu-ieee"="false".

  KnownIEEEMode fpenvIEEEMode(const Instruction &I) const;


  /// Account for loads of i8 vector types to have reduced cost. For

  /// example the cost of load 4 i8s values is one is the cost of loading

  /// a single i32 value.

  InstructionCost getMemoryOpCost(

      unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,

      TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},

      const Instruction *I = nullptr) const override;


  /// When counting parts on AMD GPUs, account for i8s being grouped

  /// together under a single i32 value. Otherwise fall back to base

  /// implementation.

  unsigned getNumberOfParts(Type *Tp) const override;


  ValueUniformity getValueUniformity(const Value *V) const override;


  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,

                                       StackOffset BaseOffset, bool HasBaseReg,

                                       int64_t Scale,

                                       unsigned AddrSpace) const override;


  bool isLSRCostLess(const TTI::LSRCost &A,

                     const TTI::LSRCost &B) const override;

  bool isNumRegsMajorCostOfLSR() const override;

  bool shouldDropLSRSolutionIfLessProfitable() const override;


  bool isUniform(const Instruction *I,

                 const SmallBitVector &UniformArgs) const override;

};


} // end namespace llvm


#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUAddrSpace.h
AMDGPU address space definition.

AMDGPU.h

BasicTTIImpl.h
This file provides a helper that implements much of the TTI interface in terms of the target-independ...

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CostKind
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

IntrinsicCostStrategy::InstructionCost
@ InstructionCost
Definition CostModel.cpp:51

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3391

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

llvm::AMDGPUTTIImpl
Definition AMDGPUTargetTransformInfo.h:36

llvm::AMDGPUTTIImpl::getMaxMemIntrinsicInlineSizeThreshold
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
Definition AMDGPUTargetTransformInfo.cpp:283

llvm::AMDGPUTTIImpl::AMDGPUTTIImpl
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition AMDGPUTargetTransformInfo.cpp:108

llvm::AMDGPUTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition AMDGPUTargetTransformInfo.cpp:278

llvm::AMDGPUTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition AMDGPUTargetTransformInfo.cpp:114

llvm::AMDGPUTargetMachine
Definition AMDGPUTargetMachine.h:34

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition Instructions.h:65

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::BasicTTIImplBase< GCNTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition BasicTTIImpl.h:1461

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::BasicTTIImplBase
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition BasicTTIImpl.h:378

llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition InstrTypes.h:1181

llvm::ElementCount
Definition TypeSize.h:298

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23

llvm::FeatureBitset
Container class for subtarget features.
Definition SubtargetFeature.h:42

llvm::Function
Definition Function.h:65

llvm::GCNSubtarget
Definition GCNSubtarget.h:35

llvm::GCNTTIImpl::simplifyDemandedLaneMaskArg
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
Definition AMDGPUInstCombineIntrinsic.cpp:526

llvm::GCNTTIImpl::GCNTTIImpl
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition AMDGPUTargetTransformInfo.cpp:306

llvm::GCNTTIImpl::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
Definition AMDGPUTargetTransformInfo.cpp:381

llvm::GCNTTIImpl::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Definition AMDGPUTargetTransformInfo.cpp:1834

llvm::GCNTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition AMDGPUTargetTransformInfo.cpp:1325

llvm::GCNTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Account for loads of i8 vector types to have reduced cost.
Definition AMDGPUTargetTransformInfo.cpp:1787

llvm::GCNTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition AMDGPUInstCombineIntrinsic.cpp:1053

llvm::GCNTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition AMDGPUTargetTransformInfo.cpp:531

llvm::GCNTTIImpl::collectKernelLaunchBounds
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
Definition AMDGPUTargetTransformInfo.cpp:1754

llvm::GCNTTIImpl::addrspacesMayAlias
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override
Definition AMDGPUTargetTransformInfo.h:198

llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
Definition AMDGPUInstCombineIntrinsic.cpp:584

llvm::GCNTTIImpl::isUniform
bool isUniform(const Instruction *I, const SmallBitVector &UniformArgs) const override
Definition AMDGPUTargetTransformInfo.cpp:1881

llvm::GCNTTIImpl::KnownIEEEMode
KnownIEEEMode
Definition AMDGPUTargetTransformInfo.h:303

llvm::GCNTTIImpl::KnownIEEEMode::On
@ On
Definition AMDGPUTargetTransformInfo.h:303

llvm::GCNTTIImpl::KnownIEEEMode::Unknown
@ Unknown
Definition AMDGPUTargetTransformInfo.h:303

llvm::GCNTTIImpl::KnownIEEEMode::Off
@ Off
Definition AMDGPUTargetTransformInfo.h:303

llvm::GCNTTIImpl::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
Definition AMDGPUTargetTransformInfo.cpp:417

llvm::GCNTTIImpl::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const override
Definition AMDGPUTargetTransformInfo.h:265

llvm::GCNTTIImpl::isInlineAsmSourceOfDivergence
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
Definition AMDGPUTargetTransformInfo.cpp:1052

llvm::GCNTTIImpl::isReadRegisterSourceOfDivergence
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
Definition AMDGPUTargetTransformInfo.cpp:1088

llvm::GCNTTIImpl::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
Definition AMDGPUTargetTransformInfo.cpp:349

llvm::GCNTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned RCID) const override
Definition AMDGPUTargetTransformInfo.cpp:321

llvm::GCNTTIImpl::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override
Definition AMDGPUTargetTransformInfo.h:214

llvm::GCNTTIImpl::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
Definition AMDGPUTargetTransformInfo.cpp:411

llvm::GCNTTIImpl::getPartialReductionCost
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
Definition AMDGPUTargetTransformInfo.h:274

llvm::GCNTTIImpl::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
Definition AMDGPUTargetTransformInfo.cpp:371

llvm::GCNTTIImpl::isLegalToVectorizeMemChain
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition AMDGPUTargetTransformInfo.cpp:398

llvm::GCNTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
Definition AMDGPUTargetTransformInfo.h:291

llvm::GCNTTIImpl::isLSRCostLess
bool isLSRCostLess(const TTI::LSRCost &A, const TTI::LSRCost &B) const override
Definition AMDGPUTargetTransformInfo.cpp:1857

llvm::GCNTTIImpl::shouldPrefetchAddressSpace
bool shouldPrefetchAddressSpace(unsigned AS) const override
Definition AMDGPUTargetTransformInfo.cpp:1750

llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
Definition AMDGPUInstCombineIntrinsic.cpp:2472

llvm::GCNTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition AMDGPUTargetTransformInfo.cpp:1007

llvm::GCNTTIImpl::hasBranchDivergence
bool hasBranchDivergence(const Function *F=nullptr) const override
Definition AMDGPUTargetTransformInfo.cpp:317

llvm::GCNTTIImpl::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
Definition AMDGPUTargetTransformInfo.cpp:1272

llvm::GCNTTIImpl::getCallerAllocaCost
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override
Definition AMDGPUTargetTransformInfo.cpp:1666

llvm::GCNTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(ElementCount VF) const override
Definition AMDGPUTargetTransformInfo.cpp:496

llvm::GCNTTIImpl::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const override
Definition AMDGPUTargetTransformInfo.cpp:454

llvm::GCNTTIImpl::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition AMDGPUTargetTransformInfo.cpp:975

llvm::GCNTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
Definition AMDGPUTargetTransformInfo.cpp:712

llvm::GCNTTIImpl::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const override
Definition AMDGPUTargetTransformInfo.h:260

llvm::GCNTTIImpl::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
Definition AMDGPUTargetTransformInfo.cpp:360

llvm::GCNTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
How much before a load we should place the prefetch instruction.
Definition AMDGPUTargetTransformInfo.cpp:1746

llvm::GCNTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition AMDGPUTargetTransformInfo.cpp:946

llvm::GCNTTIImpl::fpenvIEEEMode
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Definition AMDGPUTargetTransformInfo.cpp:1771

llvm::GCNTTIImpl::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const override
Definition AMDGPUTargetTransformInfo.cpp:1655

llvm::GCNTTIImpl::isProfitableToSinkOperands
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
Definition AMDGPUTargetTransformInfo.cpp:1469

llvm::GCNTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
Definition AMDGPUTargetTransformInfo.cpp:505

llvm::GCNTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
Definition AMDGPUTargetTransformInfo.cpp:1537

llvm::GCNTTIImpl::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
Definition AMDGPUTargetTransformInfo.cpp:993

llvm::GCNTTIImpl::shouldDropLSRSolutionIfLessProfitable
bool shouldDropLSRSolutionIfLessProfitable() const override
Definition AMDGPUTargetTransformInfo.cpp:1876

llvm::GCNTTIImpl::getInliningLastCallToStaticBonus
int getInliningLastCallToStaticBonus() const override
Definition AMDGPUTargetTransformInfo.cpp:1650

llvm::GCNTTIImpl::getFlatAddressSpace
unsigned getFlatAddressSpace() const override
Definition AMDGPUTargetTransformInfo.h:202

llvm::GCNTTIImpl::getVectorSplitCost
InstructionCost getVectorSplitCost() const
Definition AMDGPUTargetTransformInfo.h:245

llvm::GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
Definition AMDGPUInstCombineIntrinsic.cpp:2406

llvm::GCNTTIImpl::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
Definition AMDGPUTargetTransformInfo.cpp:1256

llvm::GCNTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
Definition AMDGPUTargetTransformInfo.h:126

llvm::GCNTTIImpl::getValueUniformity
ValueUniformity getValueUniformity(const Value *V) const override
Definition AMDGPUTargetTransformInfo.cpp:1815

llvm::GCNTTIImpl::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const override
When counting parts on AMD GPUs, account for i8s being grouped together under a single i32 value.
Definition AMDGPUTargetTransformInfo.cpp:1805

llvm::GCNTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition AMDGPUTargetTransformInfo.cpp:1716

llvm::GCNTTIImpl::canSimplifyLegacyMulToMul
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Definition AMDGPUInstCombineIntrinsic.cpp:399

llvm::GCNTTIImpl::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const override
Definition AMDGPUTargetTransformInfo.cpp:345

llvm::GCNTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override
Definition AMDGPUTargetTransformInfo.cpp:333

llvm::GCNTTIImpl::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const override
Definition AMDGPUTargetTransformInfo.cpp:1871

llvm::GCNTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition AMDGPUTargetTransformInfo.cpp:1710

llvm::GCNTTIImpl::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const override
Definition AMDGPUTargetTransformInfo.cpp:427

llvm::GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
Definition AMDGPUTargetTransformInfo.cpp:423

llvm::GCNTTIImpl::isValidAddrSpaceCast
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override
Definition AMDGPUTargetTransformInfo.h:189

llvm::InstCombiner
The core instruction combiner logic.
Definition InstCombiner.h:48

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition InstructionCost.h:82

llvm::Instruction
Definition Instruction.h:69

llvm::IntrinsicCostAttributes
Definition TargetTransformInfo.h:181

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition OptimizationRemarkEmitter.h:33

llvm::SITargetLowering
Definition SIISelLowering.h:32

llvm::ScalarEvolution
The main scalar evolution driver.
Definition ScalarEvolution.h:625

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:581

llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition TargetLowering.h:199

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition TargetSubtargetInfo.h:66

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:271

llvm::TargetTransformInfo::VectorInstrContext
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
Definition TargetTransformInfo.h:1067

llvm::TargetTransformInfo::VectorInstrContext::None
@ None
The insert/extract is not used with a load/store.
Definition TargetTransformInfo.h:1068

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition TargetTransformInfo.h:334

llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition TargetTransformInfo.h:337

llvm::TargetTransformInfo::OP_None
@ OP_None
Definition TargetTransformInfo.h:1275

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition TargetTransformInfo.h:1350

llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition TargetTransformInfo.h:825

llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition TargetTransformInfo.h:825

llvm::TargetTransformInfo::PartialReductionExtendKind
PartialReductionExtendKind
Definition TargetTransformInfo.h:273

llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition TargetTransformInfo.h:361

llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition TargetTransformInfo.h:1246

llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition TargetTransformInfo.h:1267

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47

llvm::TypeSize
Definition TypeSize.h:332

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:482

uint64_t

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition AMDGPUAddrSpace.h:36

llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition AMDGPUAddrSpace.h:38

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition AMDGPUMetadata.h:396

llvm::AMDGPU::addrspacesMayAlias
static bool addrspacesMayAlias(unsigned AS1, unsigned AS2)
Definition AMDGPU.h:619

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::NVPTXAS::AddressSpace
AddressSpace
Definition NVPTXAddrSpace.h:21

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::Length
@ Length
Definition DWP.cpp:558

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::VFParamKind::Vector
@ Vector
Definition VFABIDemangler.h:27

llvm::ValueUniformity
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
Definition Uniformity.h:18

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition TargetTransformInfo.h:76

llvm::TargetTransformInfo::LSRCost
Definition TargetTransformInfo.h:627

llvm::TargetTransformInfo::OperandValueInfo
Definition TargetTransformInfo.h:1283

llvm::TargetTransformInfo::PeelingPreferences
Definition TargetTransformInfo.h:765

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition TargetTransformInfo.h:641