LLVM  14.0.0git
AMDGPULowerModuleLDSPass.cpp
Go to the documentation of this file.
1 //===-- AMDGPULowerModuleLDSPass.cpp ------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass eliminates LDS uses from non-kernel functions.
10 //
11 // The strategy is to create a new struct with a field for each LDS variable
12 // and allocate that struct at the same address for every kernel. Uses of the
13 // original LDS variables are then replaced with compile time offsets from that
14 // known address. AMDGPUMachineFunction allocates the LDS global.
15 //
16 // Local variables with constant annotation or non-undef initializer are passed
17 // through unchanged for simplication or error diagnostics in later passes.
18 //
19 // To reduce the memory overhead variables that are only used by kernels are
20 // excluded from this transform. The analysis to determine whether a variable
21 // is only used by a kernel is cheap and conservative so this may allocate
22 // a variable in every kernel when it was not strictly necessary to do so.
23 //
24 // A possible future refinement is to specialise the structure per-kernel, so
25 // that fields can be elided based on more expensive analysis.
26 //
27 // NOTE: Since this pass will directly pack LDS (assume large LDS) into a struct
28 // type which would cause allocating huge memory for struct instance within
29 // every kernel. Hence, before running this pass, it is advisable to run the
30 // pass "amdgpu-replace-lds-use-with-pointer" which will replace LDS uses within
31 // non-kernel functions by pointers and thereby minimizes the unnecessary per
32 // kernel allocation of LDS memory.
33 //
34 //===----------------------------------------------------------------------===//
35 
36 #include "AMDGPU.h"
37 #include "Utils/AMDGPUBaseInfo.h"
38 #include "Utils/AMDGPULDSUtils.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/IR/InlineAsm.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/MDBuilder.h"
46 #include "llvm/InitializePasses.h"
47 #include "llvm/Pass.h"
49 #include "llvm/Support/Debug.h"
52 #include <vector>
53 
54 #define DEBUG_TYPE "amdgpu-lower-module-lds"
55 
56 using namespace llvm;
57 
59  "amdgpu-super-align-lds-globals",
60  cl::desc("Increase alignment of LDS if it is not on align boundary"),
61  cl::init(true), cl::Hidden);
62 
63 namespace {
64 
65 class AMDGPULowerModuleLDS : public ModulePass {
66 
67  static void removeFromUsedList(Module &M, StringRef Name,
69  GlobalVariable *GV = M.getNamedGlobal(Name);
70  if (!GV || ToRemove.empty()) {
71  return;
72  }
73 
75  auto *CA = cast<ConstantArray>(GV->getInitializer());
76  for (auto &Op : CA->operands()) {
77  // ModuleUtils::appendToUsed only inserts Constants
78  Constant *C = cast<Constant>(Op);
79  if (!ToRemove.contains(C->stripPointerCasts())) {
80  Init.push_back(C);
81  }
82  }
83 
84  if (Init.size() == CA->getNumOperands()) {
85  return; // none to remove
86  }
87 
88  GV->eraseFromParent();
89 
90  for (Constant *C : ToRemove) {
91  C->removeDeadConstantUsers();
92  }
93 
94  if (!Init.empty()) {
95  ArrayType *ATy =
96  ArrayType::get(Type::getInt8PtrTy(M.getContext()), Init.size());
97  GV =
100  GV->setSection("llvm.metadata");
101  }
102  }
103 
104  static void
105  removeFromUsedLists(Module &M,
106  const std::vector<GlobalVariable *> &LocalVars) {
107  SmallPtrSet<Constant *, 32> LocalVarsSet;
108  for (size_t I = 0; I < LocalVars.size(); I++) {
109  if (Constant *C = dyn_cast<Constant>(LocalVars[I]->stripPointerCasts())) {
110  LocalVarsSet.insert(C);
111  }
112  }
113  removeFromUsedList(M, "llvm.used", LocalVarsSet);
114  removeFromUsedList(M, "llvm.compiler.used", LocalVarsSet);
115  }
116 
117  static void markUsedByKernel(IRBuilder<> &Builder, Function *Func,
118  GlobalVariable *SGV) {
119  // The llvm.amdgcn.module.lds instance is implicitly used by all kernels
120  // that might call a function which accesses a field within it. This is
121  // presently approximated to 'all kernels' if there are any such functions
122  // in the module. This implicit use is redefined as an explicit use here so
123  // that later passes, specifically PromoteAlloca, account for the required
124  // memory without any knowledge of this transform.
125 
126  // An operand bundle on llvm.donothing works because the call instruction
127  // survives until after the last pass that needs to account for LDS. It is
128  // better than inline asm as the latter survives until the end of codegen. A
129  // totally robust solution would be a function with the same semantics as
130  // llvm.donothing that takes a pointer to the instance and is lowered to a
131  // no-op after LDS is allocated, but that is not presently necessary.
132 
133  LLVMContext &Ctx = Func->getContext();
134 
135  Builder.SetInsertPoint(Func->getEntryBlock().getFirstNonPHI());
136 
138 
139  Function *Decl =
140  Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {});
141 
142  Value *UseInstance[1] = {Builder.CreateInBoundsGEP(
143  SGV->getValueType(), SGV, ConstantInt::get(Type::getInt32Ty(Ctx), 0))};
144 
145  Builder.CreateCall(FTy, Decl, {},
146  {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)},
147  "");
148  }
149 
150 private:
152 
153 public:
154  static char ID;
155 
156  AMDGPULowerModuleLDS() : ModulePass(ID) {
158  }
159 
160  bool runOnModule(Module &M) override {
161  UsedList = AMDGPU::getUsedList(M);
162 
163  bool Changed = processUsedLDS(M);
164 
165  for (Function &F : M.functions()) {
166  if (F.isDeclaration())
167  continue;
168 
169  // Only lower compute kernels' LDS.
170  if (!AMDGPU::isKernel(F.getCallingConv()))
171  continue;
172  Changed |= processUsedLDS(M, &F);
173  }
174 
175  UsedList.clear();
176  return Changed;
177  }
178 
179 private:
180  bool processUsedLDS(Module &M, Function *F = nullptr) {
181  LLVMContext &Ctx = M.getContext();
182  const DataLayout &DL = M.getDataLayout();
183 
184  // Find variables to move into new struct instance
185  std::vector<GlobalVariable *> FoundLocalVars =
187 
188  if (FoundLocalVars.empty()) {
189  // No variables to rewrite, no changes made.
190  return false;
191  }
192 
193  // Increase the alignment of LDS globals if necessary to maximise the chance
194  // that we can use aligned LDS instructions to access them.
195  if (SuperAlignLDSGlobals) {
196  for (auto *GV : FoundLocalVars) {
197  Align Alignment = AMDGPU::getAlign(DL, GV);
198  TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType());
199 
200  if (GVSize > 8) {
201  // We might want to use a b96 or b128 load/store
202  Alignment = std::max(Alignment, Align(16));
203  } else if (GVSize > 4) {
204  // We might want to use a b64 load/store
205  Alignment = std::max(Alignment, Align(8));
206  } else if (GVSize > 2) {
207  // We might want to use a b32 load/store
208  Alignment = std::max(Alignment, Align(4));
209  } else if (GVSize > 1) {
210  // We might want to use a b16 load/store
211  Alignment = std::max(Alignment, Align(2));
212  }
213 
214  GV->setAlignment(Alignment);
215  }
216  }
217 
219  LayoutFields.reserve(FoundLocalVars.size());
220  for (GlobalVariable *GV : FoundLocalVars) {
221  OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()),
222  AMDGPU::getAlign(DL, GV));
223  LayoutFields.emplace_back(F);
224  }
225 
226  performOptimizedStructLayout(LayoutFields);
227 
228  std::vector<GlobalVariable *> LocalVars;
229  LocalVars.reserve(FoundLocalVars.size()); // will be at least this large
230  {
231  // This usually won't need to insert any padding, perhaps avoid the alloc
232  uint64_t CurrentOffset = 0;
233  for (size_t I = 0; I < LayoutFields.size(); I++) {
234  GlobalVariable *FGV = static_cast<GlobalVariable *>(
235  const_cast<void *>(LayoutFields[I].Id));
236  Align DataAlign = LayoutFields[I].Alignment;
237 
238  uint64_t DataAlignV = DataAlign.value();
239  if (uint64_t Rem = CurrentOffset % DataAlignV) {
240  uint64_t Padding = DataAlignV - Rem;
241 
242  // Append an array of padding bytes to meet alignment requested
243  // Note (o + (a - (o % a)) ) % a == 0
244  // (offset + Padding ) % align == 0
245 
246  Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding);
247  LocalVars.push_back(new GlobalVariable(
248  M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy),
250  false));
251  CurrentOffset += Padding;
252  }
253 
254  LocalVars.push_back(FGV);
255  CurrentOffset += LayoutFields[I].Size;
256  }
257  }
258 
259  std::vector<Type *> LocalVarTypes;
260  LocalVarTypes.reserve(LocalVars.size());
262  LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
263  [](const GlobalVariable *V) -> Type * { return V->getValueType(); });
264 
265  std::string VarName(
266  F ? (Twine("llvm.amdgcn.kernel.") + F->getName() + ".lds").str()
267  : "llvm.amdgcn.module.lds");
268  StructType *LDSTy = StructType::create(Ctx, LocalVarTypes, VarName + ".t");
269 
270  Align StructAlign =
271  AMDGPU::getAlign(DL, LocalVars[0]);
272 
273  GlobalVariable *SGV = new GlobalVariable(
274  M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy),
276  false);
277  SGV->setAlignment(StructAlign);
278  if (!F) {
280  M, {static_cast<GlobalValue *>(
282  cast<Constant>(SGV), Type::getInt8PtrTy(Ctx)))});
283  }
284 
285  // The verifier rejects used lists containing an inttoptr of a constant
286  // so remove the variables from these lists before replaceAllUsesWith
287  removeFromUsedLists(M, LocalVars);
288 
289  // Create alias.scope and their lists. Each field in the new structure
290  // does not alias with all other fields.
291  SmallVector<MDNode *> AliasScopes;
292  SmallVector<Metadata *> NoAliasList;
293  if (LocalVars.size() > 1) {
294  MDBuilder MDB(Ctx);
295  AliasScopes.reserve(LocalVars.size());
296  MDNode *Domain = MDB.createAnonymousAliasScopeDomain();
297  for (size_t I = 0; I < LocalVars.size(); I++) {
298  MDNode *Scope = MDB.createAnonymousAliasScope(Domain);
299  AliasScopes.push_back(Scope);
300  }
301  NoAliasList.append(&AliasScopes[1], AliasScopes.end());
302  }
303 
304  // Replace uses of ith variable with a constantexpr to the ith field of the
305  // instance that will be allocated by AMDGPUMachineFunction
306  Type *I32 = Type::getInt32Ty(Ctx);
307  for (size_t I = 0; I < LocalVars.size(); I++) {
308  GlobalVariable *GV = LocalVars[I];
309  Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};
310  Constant *GEP = ConstantExpr::getGetElementPtr(LDSTy, SGV, GEPIdx);
311  if (F) {
312  // Replace all constant uses with instructions if they belong to the
313  // current kernel.
314  for (User *U : make_early_inc_range(GV->users())) {
315  if (ConstantExpr *C = dyn_cast<ConstantExpr>(U))
317  }
318 
320 
321  GV->replaceUsesWithIf(GEP, [F](Use &U) {
322  Instruction *I = dyn_cast<Instruction>(U.getUser());
323  return I && I->getFunction() == F;
324  });
325  } else {
326  GV->replaceAllUsesWith(GEP);
327  }
328  if (GV->use_empty()) {
329  UsedList.erase(GV);
330  GV->eraseFromParent();
331  }
332 
333  uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I);
334  Align A = commonAlignment(StructAlign, Off);
335 
336  if (I)
337  NoAliasList[I - 1] = AliasScopes[I - 1];
338  MDNode *NoAlias =
339  NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
340  MDNode *AliasScope =
341  AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
342 
343  refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
344  }
345 
346  // Mark kernels with asm that reads the address of the allocated structure
347  // This is not necessary for lowering. This lets other passes, specifically
348  // PromoteAlloca, accurately calculate how much LDS will be used by the
349  // kernel after lowering.
350  if (!F) {
351  IRBuilder<> Builder(Ctx);
353  for (Function &Func : M.functions()) {
354  if (Func.isDeclaration())
355  continue;
356 
357  if (AMDGPU::isKernelCC(&Func) && !Kernels.contains(&Func)) {
358  markUsedByKernel(Builder, &Func, SGV);
359  Kernels.insert(&Func);
360  }
361  }
362  }
363  return true;
364  }
365 
366  void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
367  MDNode *AliasScope, MDNode *NoAlias,
368  unsigned MaxDepth = 5) {
369  if (!MaxDepth || (A == 1 && !AliasScope))
370  return;
371 
372  for (User *U : Ptr->users()) {
373  if (auto *I = dyn_cast<Instruction>(U)) {
374  if (AliasScope && I->mayReadOrWriteMemory()) {
375  MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);
376  AS = (AS ? MDNode::getMostGenericAliasScope(AS, AliasScope)
377  : AliasScope);
378  I->setMetadata(LLVMContext::MD_alias_scope, AS);
379 
380  MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
381  NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
382  I->setMetadata(LLVMContext::MD_noalias, NA);
383  }
384  }
385 
386  if (auto *LI = dyn_cast<LoadInst>(U)) {
387  LI->setAlignment(std::max(A, LI->getAlign()));
388  continue;
389  }
390  if (auto *SI = dyn_cast<StoreInst>(U)) {
391  if (SI->getPointerOperand() == Ptr)
392  SI->setAlignment(std::max(A, SI->getAlign()));
393  continue;
394  }
395  if (auto *AI = dyn_cast<AtomicRMWInst>(U)) {
396  // None of atomicrmw operations can work on pointers, but let's
397  // check it anyway in case it will or we will process ConstantExpr.
398  if (AI->getPointerOperand() == Ptr)
399  AI->setAlignment(std::max(A, AI->getAlign()));
400  continue;
401  }
402  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
403  if (AI->getPointerOperand() == Ptr)
404  AI->setAlignment(std::max(A, AI->getAlign()));
405  continue;
406  }
407  if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
408  unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
409  APInt Off(BitWidth, 0);
410  if (GEP->getPointerOperand() == Ptr) {
411  Align GA;
412  if (GEP->accumulateConstantOffset(DL, Off))
413  GA = commonAlignment(A, Off.getLimitedValue());
414  refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
415  MaxDepth - 1);
416  }
417  continue;
418  }
419  if (auto *I = dyn_cast<Instruction>(U)) {
420  if (I->getOpcode() == Instruction::BitCast ||
421  I->getOpcode() == Instruction::AddrSpaceCast)
422  refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);
423  }
424  }
425  }
426 };
427 
428 } // namespace
429 char AMDGPULowerModuleLDS::ID = 0;
430 
432 
433 INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE,
434  "Lower uses of LDS variables from non-kernel functions", false,
435  false)
436 
438  return new AMDGPULowerModuleLDS();
439 }
440 
443  return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none()
445 }
llvm::AMDGPU::findVariablesToLower
std::vector< GlobalVariable * > findVariablesToLower(Module &M, const Function *F)
Definition: AMDGPULDSUtils.cpp:311
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::GlobalVariable::eraseFromParent
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:385
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::wasm::ValType::I32
@ I32
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:255
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::Function
Definition: Function.h:61
Pass.h
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:179
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
InlineAsm.h
llvm::AMDGPULowerModuleLDSID
char & AMDGPULowerModuleLDSID
Definition: AMDGPULowerModuleLDSPass.cpp:431
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:536
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:325
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::OperandBundleDefT
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1114
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
STLExtras.h
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::StructType::create
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:477
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:201
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1208
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:723
llvm::AMDGPU::getUsedList
SmallPtrSet< GlobalValue *, 32 > getUsedList(Module &M)
Definition: AMDGPULDSUtils.cpp:344
llvm::AMDGPU::HSAMD::Key::Kernels
constexpr char Kernels[]
Key for HSA::Metadata::mKernels.
Definition: AMDGPUMetadata.h:427
CommandLine.h
AMDGPULDSUtils.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::GlobalObject::setSection
void setSection(StringRef S)
Change the section for this global.
Definition: Globals.cpp:212
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
Domain
Domain
Definition: CorrelatedValuePropagation.cpp:685
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPULowerModuleLDSPass.cpp:54
llvm::Instruction
Definition: Instruction.h:45
MDBuilder.h
llvm::appendToCompilerUsed
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
Definition: ModuleUtils.cpp:110
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE, "Lower uses of LDS variables from non-kernel functions", false, false) ModulePass *llvm
Definition: AMDGPULowerModuleLDSPass.cpp:433
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:345
llvm::cl::opt< bool >
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:136
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MDNode::intersect
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:928
uint64_t
llvm::AMDGPU::replaceConstantUsesInFunction
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F)
Replace all uses of constant C with instructions in F.
Definition: AMDGPULDSUtils.cpp:225
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:576
OptimizedStructLayout.h
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:354
IRBuilder.h
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:602
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::GlobalValue::AppendingLinkage
@ AppendingLinkage
Special purpose, only applies to global arrays.
Definition: GlobalValue.h:54
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:741
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::MDNode::getMostGenericAliasScope
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:941
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
SuperAlignLDSGlobals
static cl::opt< bool > SuperAlignLDSGlobals("amdgpu-super-align-lds-globals", cl::desc("Increase alignment of LDS if it is not on align boundary"), cl::init(true), cl::Hidden)
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Init
Definition: Record.h:271
llvm::performOptimizedStructLayout
std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
Definition: OptimizedStructLayout.cpp:42
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:869
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
llvm::AMDGPU::getAlign
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Definition: AMDGPULDSUtils.cpp:200
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:936
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::TypeSize
Definition: TypeSize.h:417
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1263
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1210
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::AMDGPULowerModuleLDSPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPULowerModuleLDSPass.cpp:441
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:186
Instructions.h
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2035
ModuleUtils.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
DerivedTypes.h
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
llvm::OptimizedStructLayoutField
A field in a structure.
Definition: OptimizedStructLayout.h:45
llvm::cl::desc
Definition: CommandLine.h:414
llvm::GlobalObject::setAlignment
void setAlignment(MaybeAlign Align)
Definition: Globals.cpp:117
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::AMDGPU::isKernelCC
bool isKernelCC(const Function *Func)
Definition: AMDGPULDSUtils.cpp:196
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
AMDGPUBaseInfo.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37