LLVM  16.0.0git
AMDGPUMachineFunction.cpp
Go to the documentation of this file.
1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "AMDGPU.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
14 #include "llvm/IR/Constants.h"
16 
17 using namespace llvm;
18 
20  : IsEntryFunction(AMDGPU::isEntryFunctionCC(
21  MF.getFunction().getCallingConv())),
22  IsModuleEntryFunction(
23  AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
24  NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
26 
27  // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
28  // except reserved size is not correctly aligned.
29  const Function &F = MF.getFunction();
30 
31  Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
32  MemoryBound = MemBoundAttr.getValueAsBool();
33 
34  Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
35  WaveLimiter = WaveLimitAttr.getValueAsBool();
36 
37  // FIXME: How is this attribute supposed to interact with statically known
38  // global sizes?
39  StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
40  if (!S.empty())
41  S.consumeInteger(0, GDSSize);
42 
43  // Assume the attribute allocates before any known GDS globals.
45 
46  CallingConv::ID CC = F.getCallingConv();
48  ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
49 }
50 
52  const GlobalVariable &GV,
53  Align Trailing) {
54  auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
55  if (!Entry.second)
56  return Entry.first->second;
57 
58  Align Alignment =
59  DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
60 
61  unsigned Offset;
63  /// TODO: We should sort these to minimize wasted space due to alignment
64  /// padding. Currently the padding is decided by the first encountered use
65  /// during lowering.
66  Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
67 
68  StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
69 
70  // Align LDS size to trailing, e.g. for aligning dynamic shared memory
71  LDSSize = alignTo(StaticLDSSize, Trailing);
72  } else {
74  "expected region address space");
75 
76  Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
77  StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
78 
79  // FIXME: Apply alignment of dynamic GDS
81  }
82 
83  Entry.first->second = Offset;
84  return Offset;
85 }
86 
87 const GlobalVariable *
89  const Module *M = F.getParent();
90  std::string KernelLDSName = "llvm.amdgcn.kernel.";
91  KernelLDSName += F.getName();
92  KernelLDSName += ".lds";
93  return M->getNamedGlobal(KernelLDSName);
94 }
95 
96 // This kernel calls no functions that require the module lds struct
97 static bool canElideModuleLDS(const Function &F) {
98  return F.hasFnAttribute("amdgpu-elide-module-lds");
99 }
100 
102  const Module *M = F.getParent();
103 
104  // This function is called before allocating any other LDS so that it can
105  // reliably put values at known addresses. Consequently, dynamic LDS, if
106  // present, will not yet have been allocated
107 
108  assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
109 
110  if (isModuleEntryFunction()) {
111 
112  // Pointer values start from zero, memory allocated per-kernel-launch
113  // Variables can be grouped into a module level struct and a struct per
114  // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
115  // are allocated at statically computable addresses here.
116  //
117  // Address 0
118  // {
119  // llvm.amdgcn.module.lds
120  // }
121  // alignment padding
122  // {
123  // llvm.amdgcn.kernel.some-name.lds
124  // }
125  // other variables, e.g. dynamic lds, allocated after this call
126 
127  const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds");
129 
130  if (GV && !canElideModuleLDS(F)) {
131  unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
132  (void)Offset;
133  assert(Offset == 0 &&
134  "Module LDS expected to be allocated before other LDS");
135  }
136 
137  if (KV) {
138  // The per-kernel offset is deterministic because it is allocated
139  // before any other non-module LDS variables.
140  unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
141  (void)Offset;
142  }
143  }
144 }
145 
148  auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
149  if (MD && MD->getNumOperands() == 1) {
150  ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
151  if (KnownSize) {
152  uint64_t V = KnownSize->getZExtValue();
153  if (V <= UINT32_MAX) {
154  return V;
155  }
156  }
157  }
158  return {};
159 }
160 
162  const GlobalVariable &GV) {
163  assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
164 
165  Align Alignment =
166  DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
167  if (Alignment <= DynLDSAlign)
168  return;
169 
170  LDSSize = alignTo(StaticLDSSize, Alignment);
171  DynLDSAlign = Alignment;
172 }
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AMDGPUMachineFunction::ExplicitKernArgSize
uint64_t ExplicitKernArgSize
Definition: AMDGPUMachineFunction.h:29
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Function
Definition: Function.h:60
getFunction
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
llvm::Attribute
Definition: Attributes.h:66
llvm::AMDGPUMachineFunction::getDynLDSAlign
Align getDynLDSAlign() const
Definition: AMDGPUMachineFunction.h:113
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
llvm::AMDGPUMachineFunction::AMDGPUMachineFunction
AMDGPUMachineFunction(const MachineFunction &MF)
Definition: AMDGPUMachineFunction.cpp:19
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::AMDGPUMachineFunction::LDSSize
uint32_t LDSSize
Number of bytes in the LDS that are being used.
Definition: AMDGPUMachineFunction.h:33
llvm::Optional< uint32_t >
llvm::AMDGPUMachineFunction::WaveLimiter
bool WaveLimiter
Definition: AMDGPUMachineFunction.h:61
llvm::Attribute::getValueAsBool
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:298
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPUMachineFunction::getKernelLDSGlobalFromFunction
static const GlobalVariable * getKernelLDSGlobalFromFunction(const Function &F)
Definition: AMDGPUMachineFunction.cpp:88
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:373
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:968
TargetMachine.h
Constants.h
llvm::AMDGPUMachineFunction::allocateLDSGlobal
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV)
Definition: AMDGPUMachineFunction.h:98
AMDGPUMachineFunction.h
AMDGPUSubtarget.h
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1831
llvm::AMDGPUMachineFunction::MemoryBound
bool MemoryBound
Definition: AMDGPUMachineFunction.h:58
llvm::AMDGPUMachineFunction::getLDSKernelIdMetadata
static Optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
Definition: AMDGPUMachineFunction.cpp:147
llvm::AMDGPUMachineFunction::allocateKnownAddressLDSGlobal
void allocateKnownAddressLDSGlobal(const Function &F)
Definition: AMDGPUMachineFunction.cpp:101
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
uint64_t
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
canElideModuleLDS
static bool canElideModuleLDS(const Function &F)
Definition: AMDGPUMachineFunction.cpp:97
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
MachineModuleInfo.h
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::AMDGPU::isModuleEntryFunctionCC
bool isModuleEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1848
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::AMDGPUMachineFunction::StaticLDSSize
uint32_t StaticLDSSize
Number of bytes in the LDS allocated statically.
Definition: AMDGPUMachineFunction.h:38
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:201
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::AMDGPUMachineFunction::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition: AMDGPUMachineFunction.h:84
llvm::AMDGPUMachineFunction::StaticGDSSize
uint32_t StaticGDSSize
Definition: AMDGPUMachineFunction.h:39
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:292
llvm::AMDGPUMachineFunction::MaxKernArgAlign
Align MaxKernArgAlign
Definition: AMDGPUMachineFunction.h:30
llvm::AMDGPUMachineFunction::GDSSize
uint32_t GDSSize
Definition: AMDGPUMachineFunction.h:34
llvm::AMDGPUMachineFunction::DynLDSAlign
Align DynLDSAlign
Align for dynamic shared memory if any.
Definition: AMDGPUMachineFunction.h:46
llvm::AMDGPUMachineFunction::setDynLDSAlign
void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV)
Definition: AMDGPUMachineFunction.cpp:161