LLVM  13.0.0git
AMDGPULDSUtils.cpp
Go to the documentation of this file.
1 //===- AMDGPULDSUtils.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // AMDGPU LDS related helper utility functions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPULDSUtils.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/IR/Constants.h"
16 
17 using namespace llvm;
18 
19 namespace llvm {
20 
21 namespace AMDGPU {
22 
23 bool isKernelCC(Function *Func) {
24  return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
25 }
26 
28  return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
29  GV->getValueType());
30 }
31 
33  User *InitialUser) {
34  // Any LDS variable can be lowered by moving into the created struct
35  // Each variable so lowered is allocated in every kernel, so variables
36  // whose users are all known to be safe to lower without the transform
37  // are left unchanged.
38  SmallPtrSet<User *, 8> Visited;
40  Stack.push_back(InitialUser);
41 
42  while (!Stack.empty()) {
43  User *V = Stack.pop_back_val();
44  Visited.insert(V);
45 
46  if (auto *G = dyn_cast<GlobalValue>(V->stripPointerCasts())) {
47  if (UsedList.contains(G)) {
48  continue;
49  }
50  }
51 
52  if (auto *I = dyn_cast<Instruction>(V)) {
53  if (isKernelCC(I->getFunction())) {
54  continue;
55  }
56  }
57 
58  if (auto *E = dyn_cast<ConstantExpr>(V)) {
59  for (Value::user_iterator EU = E->user_begin(); EU != E->user_end();
60  ++EU) {
61  if (Visited.insert(*EU).second) {
62  Stack.push_back(*EU);
63  }
64  }
65  continue;
66  }
67 
68  // Unknown user, conservatively lower the variable
69  return true;
70  }
71 
72  return false;
73 }
74 
75 std::vector<GlobalVariable *>
77  const SmallPtrSetImpl<GlobalValue *> &UsedList) {
78  std::vector<llvm::GlobalVariable *> LocalVars;
79  for (auto &GV : M.globals()) {
80  if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
81  continue;
82  }
83  if (!GV.hasInitializer()) {
84  // addrspace(3) without initializer implies cuda/hip extern __shared__
85  // the semantics for such a variable appears to be that all extern
86  // __shared__ variables alias one another, in which case this transform
87  // is not required
88  continue;
89  }
90  if (!isa<UndefValue>(GV.getInitializer())) {
91  // Initializers are unimplemented for local address space.
92  // Leave such variables in place for consistent error reporting.
93  continue;
94  }
95  if (GV.isConstant()) {
96  // A constant undef variable can't be written to, and any load is
97  // undef, so it should be eliminated by the optimizer. It could be
98  // dropped by the back end if not. This pass skips over it.
99  continue;
100  }
101  if (std::none_of(GV.user_begin(), GV.user_end(), [&](User *U) {
102  return userRequiresLowering(UsedList, U);
103  })) {
104  continue;
105  }
106  LocalVars.push_back(&GV);
107  }
108  return LocalVars;
109 }
110 
113 
115  collectUsedGlobalVariables(M, TmpVec, true);
116  UsedList.insert(TmpVec.begin(), TmpVec.end());
117 
118  TmpVec.clear();
119  collectUsedGlobalVariables(M, TmpVec, false);
120  UsedList.insert(TmpVec.begin(), TmpVec.end());
121 
122  return UsedList;
123 }
124 
125 } // end namespace AMDGPU
126 
127 } // end namespace llvm
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:879
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1496
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Function
Definition: Function.h:61
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:728
llvm::AMDGPU::getUsedList
SmallPtrSet< GlobalValue *, 32 > getUsedList(Module &M)
Definition: AMDGPULDSUtils.cpp:111
AMDGPULDSUtils.h
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
llvm::AMDGPU::findVariablesToLower
std::vector< GlobalVariable * > findVariablesToLower(Module &M, const SmallPtrSetImpl< GlobalValue * > &UsedList)
Definition: AMDGPULDSUtils.cpp:76
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::AMDGPU::isModuleEntryFunctionCC
bool isModuleEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1353
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:649
llvm::AMDGPU::isKernelCC
bool isKernelCC(Function *Func)
Definition: AMDGPULDSUtils.cpp:23
llvm::AMDGPU::getAlign
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Definition: AMDGPULDSUtils.cpp:27
llvm::Value::user_iterator
user_iterator_impl< User > user_iterator
Definition: Value.h:403
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:364
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
llvm::AMDGPU::userRequiresLowering
bool userRequiresLowering(const SmallPtrSetImpl< GlobalValue * > &UsedList, User *InitialUser)
Definition: AMDGPULDSUtils.cpp:32
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
AMDGPUBaseInfo.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364