LLVM 23.0.0git
AMDGPUMemoryUtils.cpp
Go to the documentation of this file.
1//===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUMemoryUtils.h"
10#include "AMDGPU.h"
16#include "llvm/IR/DataLayout.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/LLVMContext.h"
22
23#define DEBUG_TYPE "amdgpu-memory-utils"
24
25using namespace llvm;
26
27namespace llvm::AMDGPU {
28
30 return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
31 GV->getValueType());
32}
33
34void copyMetadataForWidenedLoad(LoadInst &Dest, const LoadInst &Source) {
36 Source.getAllMetadata(MD);
37 for (const auto [ID, N] : MD) {
38 switch (ID) {
39 case LLVMContext::MD_dbg:
40 case LLVMContext::MD_invariant_load:
41 case LLVMContext::MD_nontemporal:
42 Dest.setMetadata(ID, N);
43 break;
44 default:
45 break;
46 }
47 }
48}
49
50// Returns the target extension type of a global variable,
51// which can only be a TargetExtType, an array or single-element struct of it,
52// or their nesting combination.
53// TODO: allow struct of multiple TargetExtType elements of the same type.
54// TODO: Disallow other uses of target("amdgcn.named.barrier") including:
55// - Structs containing barriers in different scope/rank
56// - Structs containing a mixture of barriers and other data.
57// - Globals in other address spaces.
58// - Allocas.
60 Type *Ty = GV.getValueType();
61 while (true) {
62 if (auto *TTy = dyn_cast<TargetExtType>(Ty))
63 return TTy;
64 if (auto *STy = dyn_cast<StructType>(Ty)) {
65 if (STy->getNumElements() != 1)
66 return nullptr;
67 Ty = STy->getElementType(0);
68 continue;
69 }
70 if (auto *ATy = dyn_cast<ArrayType>(Ty)) {
71 Ty = ATy->getElementType();
72 continue;
73 }
74 return nullptr;
75 }
76}
77
79 if (TargetExtType *Ty = getTargetExtType(GV))
80 return Ty->getName() == "amdgcn.named.barrier" ? Ty : nullptr;
81 return nullptr;
82}
83
85 // external zero size addrspace(3) without initializer is dynlds.
86 const Module *M = GV.getParent();
87 const DataLayout &DL = M->getDataLayout();
89 return false;
90 return GV.getGlobalSize(DL) == 0;
91}
92
95 return false;
96 }
97 if (isDynamicLDS(GV)) {
98 return true;
99 }
100 if (GV.isConstant()) {
101 // A constant undef variable can't be written to, and any load is
102 // undef, so it should be eliminated by the optimizer. It could be
103 // dropped by the back end if not. This pass skips over it.
104 return false;
105 }
106 if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
107 // Initializers are unimplemented for LDS address space.
108 // Leave such variables in place for consistent error reporting.
109 return false;
110 }
111 return true;
112}
113
115 Module &M, function_ref<bool(const GlobalVariable &)> Filter) {
117 for (auto &GV : M.globals())
118 if (Filter(GV))
119 Worklist.push_back(&GV);
121}
122
124 function_ref<bool(const GlobalVariable &)> Filter,
125 FunctionVariableMap &Kernels,
126 FunctionVariableMap &Functions) {
127 // Get uses from the current function, excluding uses by called Functions
128 // Two output variables to avoid walking the globals list twice
129 for (auto &GV : M.globals()) {
130 if (!Filter(GV))
131 continue;
132 for (User *V : GV.users()) {
133 if (auto *I = dyn_cast<Instruction>(V)) {
134 Function *F = I->getFunction();
135 if (isKernel(*F))
136 Kernels[F].insert(&GV);
137 else
138 Functions[F].insert(&GV);
139 }
140 }
141 }
142}
143
144GVUsesInfoTy
146 function_ref<bool(const GlobalVariable &)> Filter) {
147
148 FunctionVariableMap DirectMapKernel;
149 FunctionVariableMap DirectMapFunction;
150 getUsesOfGVByFunction(CG, M, Filter, DirectMapKernel, DirectMapFunction);
151
152 // Collect functions whose address has escaped
153 DenseSet<Function *> AddressTakenFuncs;
154 for (Function &F : M.functions()) {
155 if (!isKernel(F))
156 if (F.hasAddressTaken(nullptr,
157 /* IgnoreCallbackUses */ false,
158 /* IgnoreAssumeLikeCalls */ false,
159 /* IgnoreLLVMUsed */ true,
160 /* IgnoreArcAttachedCall */ false)) {
161 AddressTakenFuncs.insert(&F);
162 }
163 }
164
165 // Collect variables that are used by functions whose address has escaped
166 DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
167 for (Function *F : AddressTakenFuncs) {
168 set_union(VariablesReachableThroughFunctionPointer, DirectMapFunction[F]);
169 }
170
171 auto FunctionMakesUnknownCall = [&](const Function *F) -> bool {
172 assert(!F->isDeclaration());
173 for (const CallGraphNode::CallRecord &R : *CG[F]) {
174 if (!R.second->getFunction())
175 return true;
176 }
177 return false;
178 };
179
180 // Work out which variables are reachable through function calls
181 FunctionVariableMap TransitiveMapFunction = DirectMapFunction;
182
183 // If the function makes any unknown call, assume the worst case that it can
184 // access all variables accessed by functions whose address escaped
185 for (Function &F : M.functions()) {
186 if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) {
187 if (!isKernel(F)) {
188 set_union(TransitiveMapFunction[&F],
189 VariablesReachableThroughFunctionPointer);
190 }
191 }
192 }
193
194 // Direct implementation of collecting all variables reachable from each
195 // function
196 for (Function &Func : M.functions()) {
197 if (Func.isDeclaration() || isKernel(Func))
198 continue;
199
200 DenseSet<Function *> seen; // catches cycles
201 SmallVector<Function *, 4> wip = {&Func};
202
203 while (!wip.empty()) {
204 Function *F = wip.pop_back_val();
205
206 // Can accelerate this by referring to transitive map for functions that
207 // have already been computed, with more care than this
208 set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]);
209
210 for (const CallGraphNode::CallRecord &R : *CG[F]) {
211 Function *Ith = R.second->getFunction();
212 if (Ith) {
213 if (!seen.contains(Ith)) {
214 seen.insert(Ith);
215 wip.push_back(Ith);
216 }
217 }
218 }
219 }
220 }
221
222 // Collect variables that are transitively used by functions whose address has
223 // escaped
224 for (Function *F : AddressTakenFuncs) {
225 set_union(VariablesReachableThroughFunctionPointer,
226 TransitiveMapFunction[F]);
227 }
228
229 // DirectMapKernel lists which variables are used by the kernel
230 // find the variables which are used through a function call
231 FunctionVariableMap IndirectMapKernel;
232
233 for (Function &Func : M.functions()) {
234 if (Func.isDeclaration() || !isKernel(Func))
235 continue;
236
237 for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
238 Function *Ith = R.second->getFunction();
239 if (Ith) {
240 set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]);
241 }
242 }
243
244 // Check if the kernel encounters unknows calls, wheher directly or
245 // indirectly.
246 bool SeesUnknownCalls = [&]() {
247 SmallVector<Function *> WorkList = {CG[&Func]->getFunction()};
249
250 while (!WorkList.empty()) {
251 Function *F = WorkList.pop_back_val();
252
253 for (const CallGraphNode::CallRecord &CallRecord : *CG[F]) {
254 if (!CallRecord.second)
255 continue;
256
257 Function *Callee = CallRecord.second->getFunction();
258 if (!Callee)
259 return true;
260
261 if (Visited.insert(Callee).second)
262 WorkList.push_back(Callee);
263 }
264 }
265 return false;
266 }();
267
268 if (SeesUnknownCalls) {
269 set_union(IndirectMapKernel[&Func],
270 VariablesReachableThroughFunctionPointer);
271 }
272 }
273
274 return {std::move(DirectMapKernel), std::move(IndirectMapKernel)};
275}
276
279 // Verify that we fall into one of 2 cases:
280 // - All variables are either absolute
281 // or direct mapped dynamic LDS that is not lowered.
282 // - No variables are absolute.
283 // Named-barriers which are absolute symbols are removed
284 // from the maps.
285 std::optional<bool> HasAbsoluteGVs;
286 for (auto &Map : {UsesInfo.DirectAccess, UsesInfo.IndirectAccess}) {
287 for (auto &[Fn, GVs] : Map) {
288 for (auto *GV : GVs) {
289 bool IsAbsolute = GV->isAbsoluteSymbolRef();
290 bool IsDirectMapDynLDSGV =
291 AMDGPU::isDynamicLDS(*GV) && UsesInfo.DirectAccess.contains(Fn);
292 if (IsDirectMapDynLDSGV)
293 continue;
294
295 // TODO: Remove once barriers are no longer in the LDS AS.
296 if (isNamedBarrier(*GV)) {
297 if (IsAbsolute) {
298 UsesInfo.DirectAccess[Fn].erase(GV);
299 UsesInfo.IndirectAccess[Fn].erase(GV);
300 }
301 continue;
302 }
303
304 if (HasAbsoluteGVs.has_value()) {
305 if (*HasAbsoluteGVs != IsAbsolute) {
307 "module cannot mix absolute and non-absolute LDS GVs");
308 }
309 } else
310 HasAbsoluteGVs = IsAbsolute;
311 }
312 }
313 }
314
315 // If we only had absolute GVs, we have nothing to do, return an empty
316 // result.
317 if (HasAbsoluteGVs && *HasAbsoluteGVs)
318 return GVUsesInfoTy();
319
320 return UsesInfo;
321}
322
324 ArrayRef<StringRef> FnAttrs) {
325 for (StringRef Attr : FnAttrs)
326 KernelRoot->removeFnAttr(Attr);
327
328 SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
330 bool SeenUnknownCall = false;
331
332 while (!WorkList.empty()) {
333 Function *F = WorkList.pop_back_val();
334
335 for (auto &CallRecord : *CG[F]) {
336 if (!CallRecord.second)
337 continue;
338
339 Function *Callee = CallRecord.second->getFunction();
340 if (!Callee) {
341 if (!SeenUnknownCall) {
342 SeenUnknownCall = true;
343
344 // If we see any indirect calls, assume nothing about potential
345 // targets.
346 // TODO: This could be refined to possible LDS global users.
347 for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {
348 Function *PotentialCallee =
349 ExternalCallRecord.second->getFunction();
350 assert(PotentialCallee);
351 if (!isKernel(*PotentialCallee)) {
352 for (StringRef Attr : FnAttrs)
353 PotentialCallee->removeFnAttr(Attr);
354 }
355 }
356 }
357 } else {
358 for (StringRef Attr : FnAttrs)
359 Callee->removeFnAttr(Attr);
360 if (Visited.insert(Callee).second)
361 WorkList.push_back(Callee);
362 }
363 }
364 }
365}
366
367bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
368 Instruction *DefInst = Def->getMemoryInst();
369
370 if (isa<FenceInst>(DefInst))
371 return false;
372
373 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
374 switch (II->getIntrinsicID()) {
375 case Intrinsic::amdgcn_s_barrier:
376 case Intrinsic::amdgcn_s_cluster_barrier:
377 case Intrinsic::amdgcn_s_barrier_signal:
378 case Intrinsic::amdgcn_s_barrier_signal_var:
379 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
380 case Intrinsic::amdgcn_s_barrier_init:
381 case Intrinsic::amdgcn_s_barrier_join:
382 case Intrinsic::amdgcn_s_barrier_wait:
383 case Intrinsic::amdgcn_s_barrier_leave:
384 case Intrinsic::amdgcn_s_get_barrier_state:
385 case Intrinsic::amdgcn_s_wakeup_barrier:
386 case Intrinsic::amdgcn_wave_barrier:
387 case Intrinsic::amdgcn_sched_barrier:
388 case Intrinsic::amdgcn_sched_group_barrier:
389 case Intrinsic::amdgcn_iglp_opt:
390 return false;
391 default:
392 break;
393 }
394 }
395
396 // Ignore atomics not aliasing with the original load, any atomic is a
397 // universal MemoryDef from MSSA's point of view too, just like a fence.
398 const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
399 return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
400 };
401
402 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
403 checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
404 return false;
405
406 return true;
407}
408
410 AAResults *AA) {
411 MemorySSAWalker *Walker = MSSA->getWalker();
415
416 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
417
418 // Start with a nearest dominating clobbering access, it will be either
419 // live on entry (nothing to do, load is not clobbered), MemoryDef, or
420 // MemoryPhi if several MemoryDefs can define this memory state. In that
421 // case add all Defs to WorkList and continue going up and checking all
422 // the definitions of this memory location until the root. When all the
423 // defs are exhausted and came to the entry state we have no clobber.
424 // Along the scan ignore barriers and fences which are considered clobbers
425 // by the MemorySSA, but not really writing anything into the memory.
426 while (!WorkList.empty()) {
427 MemoryAccess *MA = WorkList.pop_back_val();
428 if (!Visited.insert(MA).second)
429 continue;
430
431 if (MSSA->isLiveOnEntryDef(MA))
432 continue;
433
434 if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
435 LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
436
437 if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
438 LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
439 return true;
440 }
441
442 WorkList.push_back(
443 Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
444 continue;
445 }
446
447 const MemoryPhi *Phi = cast<MemoryPhi>(MA);
448 for (const auto &Use : Phi->incoming_values())
449 WorkList.push_back(cast<MemoryAccess>(&Use));
450 }
451
452 LLVM_DEBUG(dbgs() << " -> no clobber\n");
453 return false;
454}
455
456} // end namespace llvm::AMDGPU
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
This file defines generic set operations that may be used on set's of different types,...
#define LLVM_DEBUG(...)
Definition Debug.h:119
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
std::pair< std::optional< WeakTrackingVH >, CallGraphNode * > CallRecord
A pair of the calling instruction (a call or invoke) and the call graph node being called.
Definition CallGraph.h:174
The basic data container for the call graph of a Module of IR.
Definition CallGraph.h:72
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition CallGraph.h:127
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
const Function & getFunction() const
Definition Function.h:166
void removeFnAttr(Attribute::AttrKind Kind)
Remove function attributes from this function.
Definition Function.cpp:682
Module * getParent()
Get the module that this global value is contained inside of...
PointerType * getType() const
Global values are always pointers.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:569
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Represents phi nodes for memory accesses.
Definition MemorySSA.h:479
This is the generic walker interface for walkers of MemorySSA.
Definition MemorySSA.h:1006
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1035
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI MemorySSAWalker * getWalker()
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Class to represent target extensions types, which are generally unintrospectable from target-independ...
StringRef getName() const
Return the name for this target extension type.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:993
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:185
An efficient, type-erasing, non-owning reference to a callable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ LOCAL_ADDRESS
Address space for local memory.
GVUsesInfoTy getTransitiveUsesOfLDSForLowering(const CallGraph &CG, Module &M)
Collects all uses of LDS Global Variables in M using getUsesOfGVByFunction, with isLDSVariableToLower...
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
bool eliminateGVConstantExprUsesFromAllInstructions(Module &M, function_ref< bool(const GlobalVariable &)> Filter)
Iterates over all GlobalVariables in M, and whenever Filter returns true, replace all constant users ...
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
void getUsesOfGVByFunction(const CallGraph &CG, Module &M, function_ref< bool(const GlobalVariable &)> Filter, FunctionVariableMap &Kernels, FunctionVariableMap &Functions)
Finds uses of Global Variables on a per-function basis.
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA)
Given a Def clobbering a load from Ptr according to the MSSA check if this is actually a memory updat...
static TargetExtType * getTargetExtType(const GlobalVariable &GV)
DenseMap< Function *, DenseSet< GlobalVariable * > > FunctionVariableMap
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool isLDSVariableToLower(const GlobalVariable &GV)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
void copyMetadataForWidenedLoad(LoadInst &Dest, const LoadInst &Source)
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA)
Check is a Load is clobbered in its function.
GVUsesInfoTy getTransitiveUsesOfGV(const CallGraph &CG, Module &M, function_ref< bool(const GlobalVariable &)> Filter)
Collects all uses of Global Variables in M using getUsesOfGVByFunction.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
#define N
FunctionVariableMap DirectAccess
FunctionVariableMap IndirectAccess
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39