doxygen/AMDGPUMachineFunction_8cpp_source.html

//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "AMDGPUMachineFunction.h"

#include "AMDGPU.h"

#include "AMDGPUPerfHintAnalysis.h"

#include "AMDGPUSubtarget.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/CodeGen/MachineModuleInfo.h"

#include "llvm/IR/ConstantRange.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/Metadata.h"

#include "llvm/Target/TargetMachine.h"


using namespace llvm;


static const GlobalVariable *

getKernelDynLDSGlobalFromFunction(const Function &F) {

  const Module *M = F.getParent();

  SmallString<64> KernelDynLDSName("llvm.amdgcn.");

  KernelDynLDSName += F.getName();

  KernelDynLDSName += ".dynlds";

  return M->getNamedGlobal(KernelDynLDSName);

}


static bool hasLDSKernelArgument(const Function &F) {

  for (const Argument &Arg : F.args()) {

    Type *ArgTy = Arg.getType();

    if (auto PtrTy = dyn_cast<PointerType>(ArgTy)) {

      if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)

        return true;

    }

  }

  return false;

}


AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,

                                             const AMDGPUSubtarget &ST)

    : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),

      IsModuleEntryFunction(

          AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),

      IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())) {


  // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,

  // except reserved size is not correctly aligned.


  Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");

  MemoryBound = MemBoundAttr.getValueAsBool();


  Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");

  WaveLimiter = WaveLimitAttr.getValueAsBool();


  // FIXME: How is this attribute supposed to interact with statically known

  // global sizes?

  StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();

  if (!S.empty())

    S.consumeInteger(0, GDSSize);


  // Assume the attribute allocates before any known GDS globals.

  StaticGDSSize = GDSSize;


  // Second value, if present, is the maximum value that can be assigned.

  // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics

  // during codegen.

  std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(

      F, "amdgpu-lds-size", {0, UINT32_MAX}, true);


  // The two separate variables are only profitable when the LDS module lowering

  // pass is disabled. If graphics does not use dynamic LDS, this is never

  // profitable. Leaving cleanup for a later change.

  LDSSize = LDSSizeRange.first;

  StaticLDSSize = LDSSize;


  CallingConv::ID CC = F.getCallingConv();

  if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)

    ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);


  // FIXME: Shouldn't be target specific

  Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");

  NoSignedZerosFPMath =

      NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";


  const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);

  if (DynLdsGlobal || hasLDSKernelArgument(F))

    UsesDynamicLDS = true;

}


unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,

                                                  const GlobalVariable &GV,

                                                  Align Trailing) {

  auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));

  if (!Entry.second)

    return Entry.first->second;


  Align Alignment =

      DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());


  unsigned Offset;

  if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {


    std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);

    if (MaybeAbs) {

      // Absolute address LDS variables that exist prior to the LDS lowering

      // pass raise a fatal error in that pass. These failure modes are only

      // reachable if that lowering pass is disabled or broken. If/when adding

      // support for absolute addresses on user specified variables, the

      // alignment check moves to the lowering pass and the frame calculation

      // needs to take the user variables into consideration.


      uint32_t ObjectStart = *MaybeAbs;


      if (ObjectStart != alignTo(ObjectStart, Alignment)) {

        report_fatal_error("Absolute address LDS variable inconsistent with "

                           "variable alignment");

      }


      if (isModuleEntryFunction()) {

        // If this is a module entry function, we can also sanity check against

        // the static frame. Strictly it would be better to check against the

        // attribute, i.e. that the variable is within the always-allocated

        // section, and not within some other non-absolute-address object

        // allocated here, but the extra error detection is minimal and we would

        // have to pass the Function around or cache the attribute value.

        uint32_t ObjectEnd =

            ObjectStart + DL.getTypeAllocSize(GV.getValueType());

        if (ObjectEnd > StaticLDSSize) {

          report_fatal_error(

              "Absolute address LDS variable outside of static frame");

        }

      }


      Entry.first->second = ObjectStart;

      return ObjectStart;

    }


    /// TODO: We should sort these to minimize wasted space due to alignment

    /// padding. Currently the padding is decided by the first encountered use

    /// during lowering.

    Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);


    StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());


    // Align LDS size to trailing, e.g. for aligning dynamic shared memory

    LDSSize = alignTo(StaticLDSSize, Trailing);

  } else {

    assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&

           "expected region address space");


    Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);

    StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());


    // FIXME: Apply alignment of dynamic GDS

    GDSSize = StaticGDSSize;

  }


  Entry.first->second = Offset;

  return Offset;

}


std::optional<uint32_t>

AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {

  // TODO: Would be more consistent with the abs symbols to use a range

  MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");

  if (MD && MD->getNumOperands() == 1) {

    if (ConstantInt *KnownSize =

            mdconst::extract<ConstantInt>(MD->getOperand(0))) {

      uint64_t ZExt = KnownSize->getZExtValue();

      if (ZExt <= UINT32_MAX) {

        return ZExt;

      }

    }

  }

  return {};

}


std::optional<uint32_t>

AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {

  if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)

    return {};


  std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();

  if (!AbsSymRange)

    return {};


  if (const APInt *V = AbsSymRange->getSingleElement()) {

    std::optional<uint64_t> ZExt = V->tryZExtValue();

    if (ZExt && (*ZExt <= UINT32_MAX)) {

      return *ZExt;

    }

  }


  return {};

}


void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,

                                           const GlobalVariable &GV) {

  const Module *M = F.getParent();

  const DataLayout &DL = M->getDataLayout();

  assert(DL.getTypeAllocSize(GV.getValueType()).isZero());


  Align Alignment =

      DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());

  if (Alignment <= DynLDSAlign)

    return;


  LDSSize = alignTo(StaticLDSSize, Alignment);

  DynLDSAlign = Alignment;


  // If there is a dynamic LDS variable associated with this function F, every

  // further dynamic LDS instance (allocated by calling setDynLDSAlign) must

  // map to the same address. This holds because no LDS is allocated after the

  // lowering pass if there are dynamic LDS variables present.

  const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);

  if (Dyn) {

    unsigned Offset = LDSSize; // return this?

    std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);

    if (!Expect || (Offset != *Expect)) {

      report_fatal_error("Inconsistent metadata on dynamic LDS variable");

    }

  }

}


void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {

  UsesDynamicLDS = DynLDS;

}


bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }

AMDGPUBaseInfo.h

hasLDSKernelArgument
static bool hasLDSKernelArgument(const Function &F)
Definition: AMDGPUMachineFunction.cpp:31

getKernelDynLDSGlobalFromFunction
static const GlobalVariable * getKernelDynLDSGlobalFromFunction(const Function &F)
Definition: AMDGPUMachineFunction.cpp:23

AMDGPUMachineFunction.h

AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...

AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.

AMDGPU.h

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

ConstantRange.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

F
#define F(x, y, z)
Definition: MD5.cpp:55

MachineModuleInfo.h

Metadata.h
This file contains the declarations for metadata subclasses.

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

llvm::AMDGPUMachineFunction::AMDGPUMachineFunction
AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST)
Definition: AMDGPUMachineFunction.cpp:42

llvm::AMDGPUMachineFunction::getLDSKernelIdMetadata
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
Definition: AMDGPUMachineFunction.cpp:166

llvm::AMDGPUMachineFunction::DynLDSAlign
Align DynLDSAlign
Align for dynamic shared memory if any.
Definition: AMDGPUMachineFunction.h:47

llvm::AMDGPUMachineFunction::isDynamicLDSUsed
bool isDynamicLDSUsed() const
Definition: AMDGPUMachineFunction.cpp:232

llvm::AMDGPUMachineFunction::setUsesDynamicLDS
void setUsesDynamicLDS(bool DynLDS)
Definition: AMDGPUMachineFunction.cpp:228

llvm::AMDGPUMachineFunction::WaveLimiter
bool WaveLimiter
Definition: AMDGPUMachineFunction.h:68

llvm::AMDGPUMachineFunction::StaticGDSSize
uint32_t StaticGDSSize
Definition: AMDGPUMachineFunction.h:40

llvm::AMDGPUMachineFunction::LDSSize
uint32_t LDSSize
Number of bytes in the LDS that are being used.
Definition: AMDGPUMachineFunction.h:34

llvm::AMDGPUMachineFunction::setDynLDSAlign
void setDynLDSAlign(const Function &F, const GlobalVariable &GV)
Definition: AMDGPUMachineFunction.cpp:200

llvm::AMDGPUMachineFunction::MaxKernArgAlign
Align MaxKernArgAlign
Definition: AMDGPUMachineFunction.h:31

llvm::AMDGPUMachineFunction::getLDSAbsoluteAddress
static std::optional< uint32_t > getLDSAbsoluteAddress(const GlobalValue &GV)
Definition: AMDGPUMachineFunction.cpp:182

llvm::AMDGPUMachineFunction::GDSSize
uint32_t GDSSize
Definition: AMDGPUMachineFunction.h:35

llvm::AMDGPUMachineFunction::MemoryBound
bool MemoryBound
Definition: AMDGPUMachineFunction.h:65

llvm::AMDGPUMachineFunction::ExplicitKernArgSize
uint64_t ExplicitKernArgSize
Definition: AMDGPUMachineFunction.h:30

llvm::AMDGPUMachineFunction::allocateLDSGlobal
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV)
Definition: AMDGPUMachineFunction.h:112

llvm::AMDGPUMachineFunction::UsesDynamicLDS
bool UsesDynamicLDS
Definition: AMDGPUMachineFunction.h:50

llvm::AMDGPUMachineFunction::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition: AMDGPUMachineFunction.h:91

llvm::AMDGPUMachineFunction::StaticLDSSize
uint32_t StaticLDSSize
Number of bytes in the LDS allocated statically.
Definition: AMDGPUMachineFunction.h:39

llvm::AMDGPUMachineFunction::NoSignedZerosFPMath
bool NoSignedZerosFPMath
Definition: AMDGPUMachineFunction.h:62

llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31

llvm::Attribute
Definition: Attributes.h:67

llvm::Attribute::isStringAttribute
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:346

llvm::Attribute::getValueAsBool
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:377

llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:81

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110

llvm::Function
Definition: Function.h:64

llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80

llvm::GlobalValue
Definition: GlobalValue.h:48

llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: GlobalValue.h:205

llvm::GlobalValue::getAbsoluteSymbolRange
std::optional< ConstantRange > getAbsoluteSymbolRange() const
If this is an absolute symbol reference, returns the range of the symbol, otherwise returns std::null...
Definition: Globals.cpp:407

llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:296

llvm::GlobalVariable
Definition: GlobalVariable.h:39

llvm::MDNode
Metadata node.
Definition: Metadata.h:1067

llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428

llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::StringRef::consumeInteger
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:484

llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

uint32_t

uint64_t

unsigned

TargetMachine.h

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:35

llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition: AMDGPUBaseInfo.cpp:1296

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200

llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39