LLVM 18.0.0git
Go to the documentation of this file.
1//===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9#include "AMDGPUMemoryUtils.h"
10#include "AMDGPU.h"
11#include "AMDGPUBaseInfo.h"
12#include "llvm/ADT/SmallSet.h"
15#include "llvm/IR/DataLayout.h"
18#include "llvm/IR/IntrinsicsAMDGPU.h"
21#define DEBUG_TYPE "amdgpu-memory-utils"
23using namespace llvm;
25namespace llvm {
27namespace AMDGPU {
30 return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
31 GV->getValueType());
35 // external zero size addrspace(3) without initializer implies cuda/hip extern
36 // __shared__ the semantics for such a variable appears to be that all extern
37 // __shared__ variables alias one another. This hits different handling.
38 const Module *M = GV.getParent();
39 const DataLayout &DL = M->getDataLayout();
41 return false;
42 }
43 uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
44 return GV.hasExternalLinkage() && AllocSize == 0;
49 return false;
50 }
51 if (isDynamicLDS(GV)) {
52 return true;
53 }
54 if (GV.isConstant()) {
55 // A constant undef variable can't be written to, and any load is
56 // undef, so it should be eliminated by the optimizer. It could be
57 // dropped by the back end if not. This pass skips over it.
58 return false;
59 }
60 if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
61 // Initializers are unimplemented for LDS address space.
62 // Leave such variables in place for consistent error reporting.
63 return false;
64 }
65 return true;
68bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
69 Instruction *DefInst = Def->getMemoryInst();
71 if (isa<FenceInst>(DefInst))
72 return false;
74 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
75 switch (II->getIntrinsicID()) {
76 case Intrinsic::amdgcn_s_barrier:
77 case Intrinsic::amdgcn_wave_barrier:
78 case Intrinsic::amdgcn_sched_barrier:
79 case Intrinsic::amdgcn_sched_group_barrier:
80 return false;
81 default:
82 break;
83 }
84 }
86 // Ignore atomics not aliasing with the original load, any atomic is a
87 // universal MemoryDef from MSSA's point of view too, just like a fence.
88 const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
89 return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
90 };
92 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
93 checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
94 return false;
96 return true;
100 AAResults *AA) {
101 MemorySSAWalker *Walker = MSSA->getWalker();
106 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
108 // Start with a nearest dominating clobbering access, it will be either
109 // live on entry (nothing to do, load is not clobbered), MemoryDef, or
110 // MemoryPhi if several MemoryDefs can define this memory state. In that
111 // case add all Defs to WorkList and continue going up and checking all
112 // the definitions of this memory location until the root. When all the
113 // defs are exhausted and came to the entry state we have no clobber.
114 // Along the scan ignore barriers and fences which are considered clobbers
115 // by the MemorySSA, but not really writing anything into the memory.
116 while (!WorkList.empty()) {
117 MemoryAccess *MA = WorkList.pop_back_val();
118 if (!Visited.insert(MA).second)
119 continue;
121 if (MSSA->isLiveOnEntryDef(MA))
122 continue;
124 if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
125 LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
127 if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
128 LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
129 return true;
130 }
132 WorkList.push_back(
133 Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
134 continue;
135 }
137 const MemoryPhi *Phi = cast<MemoryPhi>(MA);
138 for (const auto &Use : Phi->incoming_values())
139 WorkList.push_back(cast<MemoryAccess>(&Use));
140 }
142 LLVM_DEBUG(dbgs() << " -> no clobber\n");
143 return false;
146} // end namespace AMDGPU
148} // end namespace llvm
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file defines the SmallSet class.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool hasExternalLinkage() const
Definition: GlobalValue.h:506
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
Type * getValueType() const
Definition: GlobalValue.h:292
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
An instruction for reading from memory.
Definition: Instructions.h:177
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Represents phi nodes for memory accesses.
Definition: MemorySSA.h:479
This is the generic walker interface for walkers of MemorySSA.
Definition: MemorySSA.h:1006
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1035
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1547
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
Address space for local memory.
Definition: AMDGPU.h:411
bool isDynamicLDS(const GlobalVariable &GV)
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA)
Given a Def clobbering a load from Ptr according to the MSSA check if this is actually a memory updat...
bool isLDSVariableToLower(const GlobalVariable &GV)
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA)
Check is a Load is clobbered in its function.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39