LLVM  16.0.0git
AMDGPUMemoryUtils.cpp
Go to the documentation of this file.
1 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMemoryUtils.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUBaseInfo.h"
12 #include "llvm/ADT/SmallSet.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 
21 #define DEBUG_TYPE "amdgpu-memory-utils"
22 
23 using namespace llvm;
24 
25 namespace llvm {
26 
27 namespace AMDGPU {
28 
30  return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
31  GV->getValueType());
32 }
33 
34 static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
35  const Function *F) {
36  // We are not interested in kernel LDS lowering for module LDS itself.
37  if (F && GV.getName() == "llvm.amdgcn.module.lds")
38  return false;
39 
40  bool Ret = false;
43 
44  assert(!F || isKernelCC(F));
45 
46  while (!Stack.empty()) {
47  const User *V = Stack.pop_back_val();
48  Visited.insert(V);
49 
50  if (isa<GlobalValue>(V)) {
51  // This use of the LDS variable is the initializer of a global variable.
52  // This is ill formed. The address of an LDS variable is kernel dependent
53  // and unknown until runtime. It can't be written to a global variable.
54  continue;
55  }
56 
57  if (auto *I = dyn_cast<Instruction>(V)) {
58  const Function *UF = I->getFunction();
59  if (UF == F) {
60  // Used from this kernel, we want to put it into the structure.
61  Ret = true;
62  } else if (!F) {
63  // For module LDS lowering, lowering is required if the user instruction
64  // is from non-kernel function.
65  Ret |= !isKernelCC(UF);
66  }
67  continue;
68  }
69 
70  // User V should be a constant, recursively visit users of V.
71  assert(isa<Constant>(V) && "Expected a constant.");
72  append_range(Stack, V->users());
73  }
74 
75  return Ret;
76 }
77 
80  return false;
81  }
82  if (!GV.hasInitializer()) {
83  // addrspace(3) without initializer implies cuda/hip extern __shared__
84  // the semantics for such a variable appears to be that all extern
85  // __shared__ variables alias one another, in which case this transform
86  // is not required
87  return false;
88  }
89  if (!isa<UndefValue>(GV.getInitializer())) {
90  // Initializers are unimplemented for LDS address space.
91  // Leave such variables in place for consistent error reporting.
92  return false;
93  }
94  if (GV.isConstant()) {
95  // A constant undef variable can't be written to, and any load is
96  // undef, so it should be eliminated by the optimizer. It could be
97  // dropped by the back end if not. This pass skips over it.
98  return false;
99  }
100  return true;
101 }
102 
103 std::vector<GlobalVariable *> findLDSVariablesToLower(Module &M,
104  const Function *F) {
105  std::vector<llvm::GlobalVariable *> LocalVars;
106  for (auto &GV : M.globals()) {
107  if (!isLDSVariableToLower(GV)) {
108  continue;
109  }
110  if (!shouldLowerLDSToStruct(GV, F)) {
111  continue;
112  }
113  LocalVars.push_back(&GV);
114  }
115  return LocalVars;
116 }
117 
119  Instruction *DefInst = Def->getMemoryInst();
120 
121  if (isa<FenceInst>(DefInst))
122  return false;
123 
124  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
125  switch (II->getIntrinsicID()) {
126  case Intrinsic::amdgcn_s_barrier:
127  case Intrinsic::amdgcn_wave_barrier:
128  case Intrinsic::amdgcn_sched_barrier:
129  case Intrinsic::amdgcn_sched_group_barrier:
130  return false;
131  default:
132  break;
133  }
134  }
135 
136  // Ignore atomics not aliasing with the original load, any atomic is a
137  // universal MemoryDef from MSSA's point of view too, just like a fence.
138  const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
139  return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
140  };
141 
142  if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
143  checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
144  return false;
145 
146  return true;
147 }
148 
150  AAResults *AA) {
151  MemorySSAWalker *Walker = MSSA->getWalker();
155 
156  LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
157 
158  // Start with a nearest dominating clobbering access, it will be either
159  // live on entry (nothing to do, load is not clobbered), MemoryDef, or
160  // MemoryPhi if several MemoryDefs can define this memory state. In that
161  // case add all Defs to WorkList and continue going up and checking all
162  // the definitions of this memory location until the root. When all the
163  // defs are exhausted and came to the entry state we have no clobber.
164  // Along the scan ignore barriers and fences which are considered clobbers
165  // by the MemorySSA, but not really writing anything into the memory.
166  while (!WorkList.empty()) {
167  MemoryAccess *MA = WorkList.pop_back_val();
168  if (!Visited.insert(MA).second)
169  continue;
170 
171  if (MSSA->isLiveOnEntryDef(MA))
172  continue;
173 
174  if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
175  LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
176 
177  if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
178  LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
179  return true;
180  }
181 
182  WorkList.push_back(
183  Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
184  continue;
185  }
186 
187  const MemoryPhi *Phi = cast<MemoryPhi>(MA);
188  for (const auto &Use : Phi->incoming_values())
189  WorkList.push_back(cast<MemoryAccess>(&Use));
190  }
191 
192  LLVM_DEBUG(dbgs() << " -> no clobber\n");
193  return false;
194 }
195 
196 } // end namespace AMDGPU
197 
198 } // end namespace llvm
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:36
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:918
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
IntrinsicInst.h
llvm::Function
Definition: Function.h:60
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::AAResults::isNoAlias
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Definition: AliasAnalysis.h:348
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::AMDGPU::shouldLowerLDSToStruct
static bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F)
Definition: AMDGPUMemoryUtils.cpp:34
llvm::MemoryPhi
Represents phi nodes for memory accesses.
Definition: MemorySSA.h:479
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AMDGPU::findLDSVariablesToLower
std::vector< GlobalVariable * > findLDSVariablesToLower(Module &M, const Function *F)
Definition: AMDGPUMemoryUtils.cpp:103
AMDGPUMemoryUtils.h
llvm::AAResults
Definition: AliasAnalysis.h:294
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
llvm::User
Definition: User.h:44
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::Instruction
Definition: Instruction.h:42
llvm::AMDGPU::isClobberedInFunction
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA)
Check is a Load is clobbered in its function.
Definition: AMDGPUMemoryUtils.cpp:149
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1044
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AMDGPU::isLDSVariableToLower
bool isLDSVariableToLower(const GlobalVariable &GV)
Definition: AMDGPUMemoryUtils.cpp:78
llvm::MemoryPhi::incoming_values
op_range incoming_values()
Definition: MemorySSA.h:522
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1560
DataLayout.h
AMDGPU.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1988
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPU::isReallyAClobber
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA)
Given a Def clobbering a load from Ptr according to the MSSA check if this is actually a memory updat...
Definition: AMDGPUMemoryUtils.cpp:118
llvm::MemoryAccess
Definition: MemorySSA.h:142
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::AMDGPU::getAlign
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Definition: AMDGPUMemoryUtils.cpp:29
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:178
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
MemorySSA.h
Instructions.h
llvm::GlobalVariable::isConstant
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Definition: GlobalVariable.h:152
ReplaceConstant.h
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:288
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:290
llvm::MemorySSAWalker
This is the generic walker interface for walkers of MemorySSA.
Definition: MemorySSA.h:1015
llvm::AMDGPU::isKernelCC
bool isKernelCC(const Function *Func)
Definition: AMDGPUBaseInfo.cpp:1829
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
AMDGPUBaseInfo.h
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365