LLVM 17.0.0git
AMDGPUMachineFunction.cpp
Go to the documentation of this file.
1//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "AMDGPU.h"
12#include "AMDGPUSubtarget.h"
15#include "llvm/IR/Constants.h"
16#include "llvm/IR/Metadata.h"
18
19using namespace llvm;
20
22 const AMDGPUSubtarget &ST)
23 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
24 IsModuleEntryFunction(
25 AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
26 NoSignedZerosFPMath(false) {
27
28 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
29 // except reserved size is not correctly aligned.
30
31 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
32 MemoryBound = MemBoundAttr.getValueAsBool();
33
34 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
35 WaveLimiter = WaveLimitAttr.getValueAsBool();
36
37 // FIXME: How is this attribute supposed to interact with statically known
38 // global sizes?
39 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
40 if (!S.empty())
42
43 // Assume the attribute allocates before any known GDS globals.
45
46 CallingConv::ID CC = F.getCallingConv();
48 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
49
50 // FIXME: Shouldn't be target specific
51 Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
53 NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
54}
55
57 const GlobalVariable &GV,
58 Align Trailing) {
59 auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
60 if (!Entry.second)
61 return Entry.first->second;
62
63 Align Alignment =
64 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
65
66 unsigned Offset;
68 /// TODO: We should sort these to minimize wasted space due to alignment
69 /// padding. Currently the padding is decided by the first encountered use
70 /// during lowering.
72
73 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
74
75 // Align LDS size to trailing, e.g. for aligning dynamic shared memory
76 LDSSize = alignTo(StaticLDSSize, Trailing);
77 } else {
79 "expected region address space");
80
82 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
83
84 // FIXME: Apply alignment of dynamic GDS
86 }
87
88 Entry.first->second = Offset;
89 return Offset;
90}
91
92static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
93
95 const Module *M = F.getParent();
96 std::string KernelLDSName = "llvm.amdgcn.kernel.";
97 KernelLDSName += F.getName();
98 KernelLDSName += ".lds";
99 return M->getNamedGlobal(KernelLDSName);
100}
101
102// This kernel calls no functions that require the module lds struct
103static bool canElideModuleLDS(const Function &F) {
104 return F.hasFnAttribute("amdgpu-elide-module-lds");
105}
106
108 const Module *M = F.getParent();
109 // This function is called before allocating any other LDS so that it can
110 // reliably put values at known addresses. Consequently, dynamic LDS, if
111 // present, will not yet have been allocated
112
113 assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
114
115 if (isModuleEntryFunction()) {
116
117 // Pointer values start from zero, memory allocated per-kernel-launch
118 // Variables can be grouped into a module level struct and a struct per
119 // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
120 // are allocated at statically computable addresses here.
121 //
122 // Address 0
123 // {
124 // llvm.amdgcn.module.lds
125 // }
126 // alignment padding
127 // {
128 // llvm.amdgcn.kernel.some-name.lds
129 // }
130 // other variables, e.g. dynamic lds, allocated after this call
131
132 const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
134
135 if (GV && !canElideModuleLDS(F)) {
136 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
137 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*GV);
138 if (!Expect || (Offset != Expect)) {
139 report_fatal_error("Inconsistent metadata on module LDS variable");
140 }
141 }
142
143 if (KV) {
144 // The per-kernel offset is deterministic because it is allocated
145 // before any other non-module LDS variables.
146 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
147 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*KV);
148 if (!Expect || (Offset != Expect)) {
149 report_fatal_error("Inconsistent metadata on kernel LDS variable");
150 }
151 }
152 }
153}
154
155std::optional<uint32_t>
157 // TODO: Would be more consistent with the abs symbols to use a range
158 MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
159 if (MD && MD->getNumOperands() == 1) {
160 if (ConstantInt *KnownSize =
161 mdconst::extract<ConstantInt>(MD->getOperand(0))) {
162 uint64_t ZExt = KnownSize->getZExtValue();
163 if (ZExt <= UINT32_MAX) {
164 return ZExt;
165 }
166 }
167 }
168 return {};
169}
170
171std::optional<uint32_t>
174 return {};
175
176 std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
177 if (!AbsSymRange)
178 return {};
179
180 if (const APInt *V = AbsSymRange->getSingleElement()) {
181 std::optional<uint64_t> ZExt = V->tryZExtValue();
182 if (ZExt && (*ZExt <= UINT32_MAX)) {
183 return *ZExt;
184 }
185 }
186
187 return {};
188}
189
191 const GlobalVariable &GV) {
192 assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
193
194 Align Alignment =
195 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
196 if (Alignment <= DynLDSAlign)
197 return;
198
199 LDSSize = alignTo(StaticLDSSize, Alignment);
200 DynLDSAlign = Alignment;
201}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static constexpr StringLiteral ModuleLDSName
static bool canElideModuleLDS(const Function &F)
static const GlobalVariable * getKernelLDSGlobalFromFunction(const Function &F)
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Base class for AMDGPU specific classes of TargetSubtarget.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define F(x, y, z)
Definition: MD5.cpp:55
This file contains the declarations for metadata subclasses.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST)
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
Align DynLDSAlign
Align for dynamic shared memory if any.
void allocateKnownAddressLDSGlobal(const Function &F)
uint32_t LDSSize
Number of bytes in the LDS that are being used.
static std::optional< uint32_t > getLDSAbsoluteAddress(const GlobalValue &GV)
void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV)
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV)
uint32_t StaticLDSSize
Number of bytes in the LDS allocated statically.
Class for arbitrary precision integers.
Definition: APInt.h:75
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:281
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:303
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:317
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
std::optional< ConstantRange > getAbsoluteSymbolRange() const
If this is an absolute symbol reference, returns the range of the symbol, otherwise returns std::null...
Definition: Globals.cpp:382
Type * getValueType() const
Definition: GlobalValue.h:292
Metadata node.
Definition: Metadata.h:943
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1291
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1297
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:840
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:378
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:381
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39