doxygen/R600TargetTransformInfo_8cpp_source.html

//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// \file

// This file implements a TargetTransformInfo analysis pass specific to the

// R600 target machine. It uses the target's detailed information to provide

// more precise answers to certain TTI queries, while letting the target

// independent and default TTI implementations handle the rest.

//

//===----------------------------------------------------------------------===//


#include "R600TargetTransformInfo.h"

#include "AMDGPU.h"

#include "AMDGPUTargetMachine.h"

#include "R600Subtarget.h"


using namespace llvm;


#define DEBUG_TYPE "R600tti"


R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)

    : BaseT(TM, F.getDataLayout()),

      ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),

      TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}


unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {

  return 4 * 128; // XXX - 4 channels. Should these count as vector instead?

}


unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {

  return getHardwareNumberOfRegisters(Vec);

}


TypeSize

R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {

  return TypeSize::getFixed(32);

}


unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }


unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {

  if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||

      AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)

    return 128;

  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||

      AddrSpace == AMDGPUAS::REGION_ADDRESS)

    return 64;

  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)

    return 32;


  if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||

       AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||

       (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&

        AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))

    return 128;

  llvm_unreachable("unhandled address space");

}


bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,

                                             Align Alignment,

                                             unsigned AddrSpace) const {

  // We allow vectorization of flat stores, even though we may need to decompose

  // them later if they may access private memory. We don't have enough context

  // here, and legalization can handle it.

  return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);

}


bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,

                                              Align Alignment,

                                              unsigned AddrSpace) const {

  return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);

}


bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,

                                               Align Alignment,

                                               unsigned AddrSpace) const {

  return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);

}


unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) {

  // Disable unrolling if the loop is not vectorized.

  // TODO: Enable this again.

  if (VF.isScalar())

    return 1;


  return 8;

}


InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,

                                            TTI::TargetCostKind CostKind,

                                            const Instruction *I) {

  if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)

    return Opcode == Instruction::PHI ? 0 : 1;


  // XXX - For some reason this isn't called for switch.

  switch (Opcode) {

  case Instruction::Br:

  case Instruction::Ret:

    return 10;

  default:

    return BaseT::getCFInstrCost(Opcode, CostKind, I);

  }

}


InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,

                                                TTI::TargetCostKind CostKind,

                                                unsigned Index, Value *Op0,

                                                Value *Op1) {

  switch (Opcode) {

  case Instruction::ExtractElement:

  case Instruction::InsertElement: {

    unsigned EltSize =

        DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());

    if (EltSize < 32) {

      return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0,

                                       Op1);

    }


    // Extracts are just reads of a subregister, so are free. Inserts are

    // considered free because we don't want to have any cost for scalarizing

    // operations, and we don't have to copy into a different register class.


    // Dynamic indexing isn't free and is best avoided.

    return Index == ~0u ? 2 : 0;

  }

  default:

    return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);

  }

}


void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                                          TTI::UnrollingPreferences &UP,

                                          OptimizationRemarkEmitter *ORE) {

  CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);

}


void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                                        TTI::PeelingPreferences &PP) {

  CommonTTI.getPeelingPreferences(L, SE, PP);

}

const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

AMDGPU.h

CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:52

R600Subtarget.h
AMDGPU R600 specific subclass of TargetSubtarget.

R600TargetTransformInfo.h
This file a TargetTransformInfo::Concept conforming object specific to the R600 target machine.

llvm::AMDGPUTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: AMDGPUTargetTransformInfo.cpp:265

llvm::AMDGPUTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: AMDGPUTargetTransformInfo.cpp:103

llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:29

llvm::BasicTTIImplBase< R600TTIImpl >

llvm::BasicTTIImplBase< R600TTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Definition: BasicTTIImpl.h:1279

llvm::BasicTTIImplBase< R600TTIImpl >::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1222

llvm::BasicTTIImplBase< R600TTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:39

llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:622

llvm::ElementCount
Definition: TypeSize.h:300

llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322

llvm::Function
Definition: Function.h:64

llvm::InstructionCost
Definition: InstructionCost.h:29

llvm::Instruction
Definition: Instruction.h:68

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:34

llvm::R600Subtarget
Definition: R600Subtarget.h:30

llvm::R600TTIImpl::isLegalToVectorizeMemChain
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: R600TargetTransformInfo.cpp:64

llvm::R600TTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: R600TargetTransformInfo.cpp:142

llvm::R600TTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: R600TargetTransformInfo.cpp:94

llvm::R600TTIImpl::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: R600TargetTransformInfo.cpp:79

llvm::R600TTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(ElementCount VF)
Definition: R600TargetTransformInfo.cpp:85

llvm::R600TTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: R600TargetTransformInfo.cpp:136

llvm::R600TTIImpl::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: R600TargetTransformInfo.cpp:46

llvm::R600TTIImpl::R600TTIImpl
R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition: R600TargetTransformInfo.cpp:26

llvm::R600TTIImpl::getHardwareNumberOfRegisters
unsigned getHardwareNumberOfRegisters(bool Vec) const
Definition: R600TargetTransformInfo.cpp:31

llvm::R600TTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const
Definition: R600TargetTransformInfo.cpp:40

llvm::R600TTIImpl::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: R600TargetTransformInfo.cpp:44

llvm::R600TTIImpl::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: R600TargetTransformInfo.cpp:73

llvm::R600TTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Definition: R600TargetTransformInfo.cpp:110

llvm::R600TTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(bool Vec) const
Definition: R600TargetTransformInfo.cpp:35

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:450

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:259

llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:262

llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:263

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:1143

llvm::TypeSize
Definition: TypeSize.h:334

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition: AMDGPUAddrSpace.h:78

llvm::AMDGPUAS::PARAM_D_ADDRESS
@ PARAM_D_ADDRESS
end Internal address spaces.
Definition: AMDGPUAddrSpace.h:53

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPUAddrSpace.h:63

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition: AMDGPUAddrSpace.h:55

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPUAddrSpace.h:31

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPUAddrSpace.h:36

llvm::dwarf::Index
Index
Definition: Dwarf.h:875

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:646

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:531