LLVM  16.0.0git
AMDGPULowerModuleLDSPass.cpp
Go to the documentation of this file.
1 //===-- AMDGPULowerModuleLDSPass.cpp ------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass eliminates LDS uses from non-kernel functions.
10 //
11 // The strategy is to create a new struct with a field for each LDS variable
12 // and allocate that struct at the same address for every kernel. Uses of the
13 // original LDS variables are then replaced with compile time offsets from that
14 // known address. AMDGPUMachineFunction allocates the LDS global.
15 //
16 // Local variables with constant annotation or non-undef initializer are passed
17 // through unchanged for simplification or error diagnostics in later passes.
18 //
19 // To reduce the memory overhead variables that are only used by kernels are
20 // excluded from this transform. The analysis to determine whether a variable
21 // is only used by a kernel is cheap and conservative so this may allocate
22 // a variable in every kernel when it was not strictly necessary to do so.
23 //
24 // A possible future refinement is to specialise the structure per-kernel, so
25 // that fields can be elided based on more expensive analysis.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/BitVector.h"
33 #include "llvm/ADT/DenseMap.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SetVector.h"
37 #include "llvm/IR/Constants.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/IRBuilder.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/MDBuilder.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
46 #include "llvm/Support/Debug.h"
49 #include <tuple>
50 #include <vector>
51 
52 #define DEBUG_TYPE "amdgpu-lower-module-lds"
53 
54 using namespace llvm;
55 
57  "amdgpu-super-align-lds-globals",
58  cl::desc("Increase alignment of LDS if it is not on align boundary"),
59  cl::init(true), cl::Hidden);
60 
61 namespace {
62 class AMDGPULowerModuleLDS : public ModulePass {
63 
64  static void removeFromUsedList(Module &M, StringRef Name,
66  GlobalVariable *GV = M.getNamedGlobal(Name);
67  if (!GV || ToRemove.empty()) {
68  return;
69  }
70 
72  auto *CA = cast<ConstantArray>(GV->getInitializer());
73  for (auto &Op : CA->operands()) {
74  // ModuleUtils::appendToUsed only inserts Constants
75  Constant *C = cast<Constant>(Op);
76  if (!ToRemove.contains(C->stripPointerCasts())) {
77  Init.push_back(C);
78  }
79  }
80 
81  if (Init.size() == CA->getNumOperands()) {
82  return; // none to remove
83  }
84 
85  GV->eraseFromParent();
86 
87  for (Constant *C : ToRemove) {
88  C->removeDeadConstantUsers();
89  }
90 
91  if (!Init.empty()) {
92  ArrayType *ATy =
93  ArrayType::get(Type::getInt8PtrTy(M.getContext()), Init.size());
94  GV =
96  ConstantArray::get(ATy, Init), Name);
97  GV->setSection("llvm.metadata");
98  }
99  }
100 
101  static void
102  removeFromUsedLists(Module &M,
103  const std::vector<GlobalVariable *> &LocalVars) {
104  // The verifier rejects used lists containing an inttoptr of a constant
105  // so remove the variables from these lists before replaceAllUsesWith
106 
107  SmallPtrSet<Constant *, 32> LocalVarsSet;
108  for (GlobalVariable *LocalVar : LocalVars)
109  if (Constant *C = dyn_cast<Constant>(LocalVar->stripPointerCasts()))
110  LocalVarsSet.insert(C);
111  removeFromUsedList(M, "llvm.used", LocalVarsSet);
112  removeFromUsedList(M, "llvm.compiler.used", LocalVarsSet);
113  }
114 
115  static void markUsedByKernel(IRBuilder<> &Builder, Function *Func,
116  GlobalVariable *SGV) {
117  // The llvm.amdgcn.module.lds instance is implicitly used by all kernels
118  // that might call a function which accesses a field within it. This is
119  // presently approximated to 'all kernels' if there are any such functions
120  // in the module. This implicit use is redefined as an explicit use here so
121  // that later passes, specifically PromoteAlloca, account for the required
122  // memory without any knowledge of this transform.
123 
124  // An operand bundle on llvm.donothing works because the call instruction
125  // survives until after the last pass that needs to account for LDS. It is
126  // better than inline asm as the latter survives until the end of codegen. A
127  // totally robust solution would be a function with the same semantics as
128  // llvm.donothing that takes a pointer to the instance and is lowered to a
129  // no-op after LDS is allocated, but that is not presently necessary.
130 
131  LLVMContext &Ctx = Func->getContext();
132 
133  Builder.SetInsertPoint(Func->getEntryBlock().getFirstNonPHI());
134 
136 
137  Function *Decl =
138  Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {});
139 
140  Value *UseInstance[1] = {Builder.CreateInBoundsGEP(
141  SGV->getValueType(), SGV, ConstantInt::get(Type::getInt32Ty(Ctx), 0))};
142 
143  Builder.CreateCall(FTy, Decl, {},
144  {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)},
145  "");
146  }
147 
148  static bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
149  // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
150  // global may have uses from multiple different functions as a result.
151  // This pass specialises LDS variables with respect to the kernel that
152  // allocates them.
153 
154  // This is semantically equivalent to:
155  // for (auto &F : M.functions())
156  // for (auto &BB : F)
157  // for (auto &I : BB)
158  // for (Use &Op : I.operands())
159  // if (constantExprUsesLDS(Op))
160  // replaceConstantExprInFunction(I, Op);
161 
162  bool Changed = false;
163 
164  // Find all ConstantExpr that are direct users of an LDS global
166  for (auto &GV : M.globals())
168  for (User *U : GV.users())
169  if (ConstantExpr *C = dyn_cast<ConstantExpr>(U))
170  Stack.push_back(C);
171 
172  // Expand to include constexpr users of direct users
173  SetVector<ConstantExpr *> ConstExprUsersOfLDS;
174  while (!Stack.empty()) {
175  ConstantExpr *V = Stack.pop_back_val();
176  if (ConstExprUsersOfLDS.contains(V))
177  continue;
178 
179  ConstExprUsersOfLDS.insert(V);
180 
181  for (auto *Nested : V->users())
182  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Nested))
183  Stack.push_back(CE);
184  }
185 
186  // Find all instructions that use any of the ConstExpr users of LDS
188  for (ConstantExpr *CE : ConstExprUsersOfLDS)
189  for (User *U : CE->users())
190  if (auto *I = dyn_cast<Instruction>(U))
191  InstructionWorklist.insert(I);
192 
193  // Replace those ConstExpr operands with instructions
194  while (!InstructionWorklist.empty()) {
195  Instruction *I = InstructionWorklist.pop_back_val();
196  for (Use &U : I->operands()) {
197 
198  auto *BI = I;
199  if (auto *Phi = dyn_cast<PHINode>(I)) {
200  BasicBlock *BB = Phi->getIncomingBlock(U);
201  BasicBlock::iterator It = BB->getFirstInsertionPt();
202  assert(It != BB->end() && "Unexpected empty basic block");
203  BI = &(*(It));
204  }
205 
206  if (ConstantExpr *C = dyn_cast<ConstantExpr>(U.get())) {
207  if (ConstExprUsersOfLDS.contains(C)) {
208  Changed = true;
209  Instruction *NI = C->getAsInstruction(BI);
210  InstructionWorklist.insert(NI);
211  U.set(NI);
212  C->removeDeadConstantUsers();
213  }
214  }
215  }
216  }
217 
218  return Changed;
219  }
220 
221 public:
222  static char ID;
223 
224  AMDGPULowerModuleLDS() : ModulePass(ID) {
226  }
227 
228  bool runOnModule(Module &M) override {
229  LLVMContext &Ctx = M.getContext();
230  CallGraph CG = CallGraph(M);
231  bool Changed = superAlignLDSGlobals(M);
232 
233  Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M);
234 
235  // Move variables used by functions into amdgcn.module.lds
236  std::vector<GlobalVariable *> ModuleScopeVariables =
238  if (!ModuleScopeVariables.empty()) {
239  std::string VarName = "llvm.amdgcn.module.lds";
240 
241  GlobalVariable *SGV;
242  DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
243  std::tie(SGV, LDSVarsToConstantGEP) =
244  createLDSVariableReplacement(M, VarName, ModuleScopeVariables);
245 
247  M, {static_cast<GlobalValue *>(
249  cast<Constant>(SGV), Type::getInt8PtrTy(Ctx)))});
250 
251  removeFromUsedLists(M, ModuleScopeVariables);
252  replaceLDSVariablesWithStruct(M, ModuleScopeVariables, SGV,
253  LDSVarsToConstantGEP,
254  [](Use &) { return true; });
255 
256  // This ensures the variable is allocated when called functions access it.
257  // It also lets other passes, specifically PromoteAlloca, accurately
258  // calculate how much LDS will be used by the kernel after lowering.
259 
260  IRBuilder<> Builder(Ctx);
261  for (Function &Func : M.functions()) {
262  if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) {
263  const CallGraphNode *N = CG[&Func];
264  const bool CalleesRequireModuleLDS = N->size() > 0;
265 
266  if (CalleesRequireModuleLDS) {
267  // If a function this kernel might call requires module LDS,
268  // annotate the kernel to let later passes know it will allocate
269  // this structure, even if not apparent from the IR.
270  markUsedByKernel(Builder, &Func, SGV);
271  } else {
272  // However if we are certain this kernel cannot call a function that
273  // requires module LDS, annotate the kernel so the backend can elide
274  // the allocation without repeating callgraph walks.
275  Func.addFnAttr("amdgpu-elide-module-lds");
276  }
277  }
278  }
279 
280  Changed = true;
281  }
282 
283  // Move variables used by kernels into per-kernel instances
284  for (Function &F : M.functions()) {
285  if (F.isDeclaration())
286  continue;
287 
288  // Only lower compute kernels' LDS.
289  if (!AMDGPU::isKernel(F.getCallingConv()))
290  continue;
291 
292  std::vector<GlobalVariable *> KernelUsedVariables =
294 
295  if (!KernelUsedVariables.empty()) {
296  // The association between kernel function and LDS struct is done by
297  // symbol name, which only works if the function in question has a name
298  // This is not expected to be a problem in practice as kernels are
299  // called by name making anonymous ones (which are named by the backend)
300  // difficult to use. This does mean that llvm test cases need
301  // to name the kernels.
302  if (!F.hasName()) {
303  report_fatal_error("Anonymous kernels cannot use LDS variables");
304  }
305 
306  std::string VarName =
307  (Twine("llvm.amdgcn.kernel.") + F.getName() + ".lds").str();
308  GlobalVariable *SGV;
309  DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
310  std::tie(SGV, LDSVarsToConstantGEP) =
311  createLDSVariableReplacement(M, VarName, KernelUsedVariables);
312 
313  removeFromUsedLists(M, KernelUsedVariables);
314  replaceLDSVariablesWithStruct(
315  M, KernelUsedVariables, SGV, LDSVarsToConstantGEP, [&F](Use &U) {
316  Instruction *I = dyn_cast<Instruction>(U.getUser());
317  return I && I->getFunction() == &F;
318  });
319  Changed = true;
320  }
321  }
322 
323  for (auto &GV : make_early_inc_range(M.globals()))
326  if (GV.use_empty())
327  GV.eraseFromParent();
328  }
329 
330  return Changed;
331  }
332 
333 private:
334  // Increase the alignment of LDS globals if necessary to maximise the chance
335  // that we can use aligned LDS instructions to access them.
336  static bool superAlignLDSGlobals(Module &M) {
337  const DataLayout &DL = M.getDataLayout();
338  bool Changed = false;
339  if (!SuperAlignLDSGlobals) {
340  return Changed;
341  }
342 
343  for (auto &GV : M.globals()) {
345  // Only changing alignment of LDS variables
346  continue;
347  }
348  if (!GV.hasInitializer()) {
349  // cuda/hip extern __shared__ variable, leave alignment alone
350  continue;
351  }
352 
353  Align Alignment = AMDGPU::getAlign(DL, &GV);
354  TypeSize GVSize = DL.getTypeAllocSize(GV.getValueType());
355 
356  if (GVSize > 8) {
357  // We might want to use a b96 or b128 load/store
358  Alignment = std::max(Alignment, Align(16));
359  } else if (GVSize > 4) {
360  // We might want to use a b64 load/store
361  Alignment = std::max(Alignment, Align(8));
362  } else if (GVSize > 2) {
363  // We might want to use a b32 load/store
364  Alignment = std::max(Alignment, Align(4));
365  } else if (GVSize > 1) {
366  // We might want to use a b16 load/store
367  Alignment = std::max(Alignment, Align(2));
368  }
369 
370  if (Alignment != AMDGPU::getAlign(DL, &GV)) {
371  Changed = true;
372  GV.setAlignment(Alignment);
373  }
374  }
375  return Changed;
376  }
377 
378  std::tuple<GlobalVariable *, DenseMap<GlobalVariable *, Constant *>>
379  createLDSVariableReplacement(
380  Module &M, std::string VarName,
381  std::vector<GlobalVariable *> const &LDSVarsToTransform) {
382  // Create a struct instance containing LDSVarsToTransform and map from those
383  // variables to ConstantExprGEP
384  // Variables may be introduced to meet alignment requirements. No aliasing
385  // metadata is useful for these as they have no uses. Erased before return.
386 
387  LLVMContext &Ctx = M.getContext();
388  const DataLayout &DL = M.getDataLayout();
389  assert(!LDSVarsToTransform.empty());
390 
392  LayoutFields.reserve(LDSVarsToTransform.size());
393  {
394  // The order of fields in this struct depends on the order of
395  // varables in the argument which varies when changing how they
396  // are identified, leading to spurious test breakage.
397  std::vector<GlobalVariable *> Sorted(LDSVarsToTransform.begin(),
398  LDSVarsToTransform.end());
399  llvm::sort(Sorted.begin(), Sorted.end(),
400  [](const GlobalVariable *lhs, const GlobalVariable *rhs) {
401  return lhs->getName() < rhs->getName();
402  });
403  for (GlobalVariable *GV : Sorted) {
405  DL.getTypeAllocSize(GV->getValueType()),
406  AMDGPU::getAlign(DL, GV));
407  LayoutFields.emplace_back(F);
408  }
409  }
410 
411  performOptimizedStructLayout(LayoutFields);
412 
413  std::vector<GlobalVariable *> LocalVars;
414  BitVector IsPaddingField;
415  LocalVars.reserve(LDSVarsToTransform.size()); // will be at least this large
416  IsPaddingField.reserve(LDSVarsToTransform.size());
417  {
418  uint64_t CurrentOffset = 0;
419  for (size_t I = 0; I < LayoutFields.size(); I++) {
420  GlobalVariable *FGV = static_cast<GlobalVariable *>(
421  const_cast<void *>(LayoutFields[I].Id));
422  Align DataAlign = LayoutFields[I].Alignment;
423 
424  uint64_t DataAlignV = DataAlign.value();
425  if (uint64_t Rem = CurrentOffset % DataAlignV) {
426  uint64_t Padding = DataAlignV - Rem;
427 
428  // Append an array of padding bytes to meet alignment requested
429  // Note (o + (a - (o % a)) ) % a == 0
430  // (offset + Padding ) % align == 0
431 
432  Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding);
433  LocalVars.push_back(new GlobalVariable(
434  M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy),
436  false));
437  IsPaddingField.push_back(true);
438  CurrentOffset += Padding;
439  }
440 
441  LocalVars.push_back(FGV);
442  IsPaddingField.push_back(false);
443  CurrentOffset += LayoutFields[I].Size;
444  }
445  }
446 
447  std::vector<Type *> LocalVarTypes;
448  LocalVarTypes.reserve(LocalVars.size());
450  LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
451  [](const GlobalVariable *V) -> Type * { return V->getValueType(); });
452 
453  StructType *LDSTy = StructType::create(Ctx, LocalVarTypes, VarName + ".t");
454 
455  Align StructAlign = AMDGPU::getAlign(DL, LocalVars[0]);
456 
457  GlobalVariable *SGV = new GlobalVariable(
458  M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy),
460  false);
461  SGV->setAlignment(StructAlign);
462 
464  Type *I32 = Type::getInt32Ty(Ctx);
465  for (size_t I = 0; I < LocalVars.size(); I++) {
466  GlobalVariable *GV = LocalVars[I];
467  Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};
468  Constant *GEP = ConstantExpr::getGetElementPtr(LDSTy, SGV, GEPIdx, true);
469  if (IsPaddingField[I]) {
470  assert(GV->use_empty());
471  GV->eraseFromParent();
472  } else {
473  Map[GV] = GEP;
474  }
475  }
476  assert(Map.size() == LDSVarsToTransform.size());
477  return std::make_tuple(SGV, std::move(Map));
478  }
479 
480  template <typename PredicateTy>
481  void replaceLDSVariablesWithStruct(
482  Module &M, std::vector<GlobalVariable *> const &LDSVarsToTransform,
483  GlobalVariable *SGV,
484  DenseMap<GlobalVariable *, Constant *> &LDSVarsToConstantGEP,
485  PredicateTy Predicate) {
486  LLVMContext &Ctx = M.getContext();
487  const DataLayout &DL = M.getDataLayout();
488 
489  // Create alias.scope and their lists. Each field in the new structure
490  // does not alias with all other fields.
491  SmallVector<MDNode *> AliasScopes;
492  SmallVector<Metadata *> NoAliasList;
493  const size_t NumberVars = LDSVarsToTransform.size();
494  if (NumberVars > 1) {
495  MDBuilder MDB(Ctx);
496  AliasScopes.reserve(NumberVars);
497  MDNode *Domain = MDB.createAnonymousAliasScopeDomain();
498  for (size_t I = 0; I < NumberVars; I++) {
499  MDNode *Scope = MDB.createAnonymousAliasScope(Domain);
500  AliasScopes.push_back(Scope);
501  }
502  NoAliasList.append(&AliasScopes[1], AliasScopes.end());
503  }
504 
505  // Replace uses of ith variable with a constantexpr to the corresponding
506  // field of the instance that will be allocated by AMDGPUMachineFunction
507  for (size_t I = 0; I < NumberVars; I++) {
508  GlobalVariable *GV = LDSVarsToTransform[I];
509  Constant *GEP = LDSVarsToConstantGEP[GV];
510 
512  if (GV->use_empty()) {
513  GV->eraseFromParent();
514  }
515 
516  APInt APOff(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
517  GEP->stripAndAccumulateInBoundsConstantOffsets(DL, APOff);
518  uint64_t Offset = APOff.getZExtValue();
519 
520  Align A = commonAlignment(SGV->getAlign().valueOrOne(), Offset);
521 
522  if (I)
523  NoAliasList[I - 1] = AliasScopes[I - 1];
524  MDNode *NoAlias =
525  NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
526  MDNode *AliasScope =
527  AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
528 
529  refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
530  }
531  }
532 
533  void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
534  MDNode *AliasScope, MDNode *NoAlias,
535  unsigned MaxDepth = 5) {
536  if (!MaxDepth || (A == 1 && !AliasScope))
537  return;
538 
539  for (User *U : Ptr->users()) {
540  if (auto *I = dyn_cast<Instruction>(U)) {
541  if (AliasScope && I->mayReadOrWriteMemory()) {
542  MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);
543  AS = (AS ? MDNode::getMostGenericAliasScope(AS, AliasScope)
544  : AliasScope);
545  I->setMetadata(LLVMContext::MD_alias_scope, AS);
546 
547  MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
548  NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
549  I->setMetadata(LLVMContext::MD_noalias, NA);
550  }
551  }
552 
553  if (auto *LI = dyn_cast<LoadInst>(U)) {
554  LI->setAlignment(std::max(A, LI->getAlign()));
555  continue;
556  }
557  if (auto *SI = dyn_cast<StoreInst>(U)) {
558  if (SI->getPointerOperand() == Ptr)
559  SI->setAlignment(std::max(A, SI->getAlign()));
560  continue;
561  }
562  if (auto *AI = dyn_cast<AtomicRMWInst>(U)) {
563  // None of atomicrmw operations can work on pointers, but let's
564  // check it anyway in case it will or we will process ConstantExpr.
565  if (AI->getPointerOperand() == Ptr)
566  AI->setAlignment(std::max(A, AI->getAlign()));
567  continue;
568  }
569  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
570  if (AI->getPointerOperand() == Ptr)
571  AI->setAlignment(std::max(A, AI->getAlign()));
572  continue;
573  }
574  if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
575  unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
576  APInt Off(BitWidth, 0);
577  if (GEP->getPointerOperand() == Ptr) {
578  Align GA;
579  if (GEP->accumulateConstantOffset(DL, Off))
580  GA = commonAlignment(A, Off.getLimitedValue());
581  refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
582  MaxDepth - 1);
583  }
584  continue;
585  }
586  if (auto *I = dyn_cast<Instruction>(U)) {
587  if (I->getOpcode() == Instruction::BitCast ||
588  I->getOpcode() == Instruction::AddrSpaceCast)
589  refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);
590  }
591  }
592  }
593 };
594 
595 } // namespace
596 char AMDGPULowerModuleLDS::ID = 0;
597 
599 
600 INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE,
601  "Lower uses of LDS variables from non-kernel functions", false,
602  false)
603 
605  return new AMDGPULowerModuleLDS();
606 }
607 
610  return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none()
612 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::GlobalVariable::eraseFromParent
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:459
llvm::BitVector::push_back
void push_back(bool Val)
Definition: BitVector.h:459
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::wasm::ValType::I32
@ I32
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1481
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
llvm::Function
Definition: Function.h:60
llvm::BitVector::reserve
void reserve(unsigned N)
Definition: BitVector.h:341
Pass.h
llvm::lltok::LocalVar
@ LocalVar
Definition: LLToken.h:440
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:192
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
InlineAsm.h
llvm::AMDGPULowerModuleLDSID
char & AMDGPULowerModuleLDSID
Definition: AMDGPULowerModuleLDSPass.cpp:598
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:547
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:72
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::MaybeAlign::valueOrOne
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:142
llvm::OperandBundleDefT
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: AutoUpgrade.h:32
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
STLExtras.h
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::StructType::create
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:513
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1400
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:213
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:1073
CommandLine.h
llvm::AMDGPU::findLDSVariablesToLower
std::vector< GlobalVariable * > findLDSVariablesToLower(Module &M, const Function *F)
Definition: AMDGPUMemoryUtils.cpp:103
AMDGPUMemoryUtils.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
llvm::msgpack::Type::Map
@ Map
llvm::GlobalObject::setSection
void setSection(StringRef S)
Change the section for this global.
Definition: Globals.cpp:243
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
SI
@ SI
Definition: SIInstrInfo.cpp:7966
Domain
Domain
Definition: CorrelatedValuePropagation.cpp:709
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPULowerModuleLDSPass.cpp:52
llvm::Instruction
Definition: Instruction.h:42
MDBuilder.h
llvm::appendToCompilerUsed
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
Definition: ModuleUtils.cpp:111
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
BitVector.h
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
llvm::BitVector
Definition: BitVector.h:75
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE, "Lower uses of LDS variables from non-kernel functions", false, false) ModulePass *llvm
Definition: AMDGPULowerModuleLDSPass.cpp:600
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1657
llvm::cl::opt< bool >
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::SetVector::contains
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:209
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MDNode::intersect
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1020
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
uint64_t
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
llvm::pdb::PDB_MemoryType::Stack
@ Stack
transform
instcombine should handle this transform
Definition: README.txt:262
OptimizedStructLayout.h
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AMDGPU::isLDSVariableToLower
bool isLDSVariableToLower(const GlobalVariable &GV)
Definition: AMDGPUMemoryUtils.cpp:78
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:638
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::GlobalValue::AppendingLinkage
@ AppendingLinkage
Special purpose, only applies to global arrays.
Definition: GlobalValue.h:54
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:702
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::MDNode::getMostGenericAliasScope
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1033
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
SuperAlignLDSGlobals
static cl::opt< bool > SuperAlignLDSGlobals("amdgpu-super-align-lds-globals", cl::desc("Increase alignment of LDS if it is not on align boundary"), cl::init(true), cl::Hidden)
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Init
Definition: Record.h:281
llvm::performOptimizedStructLayout
std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
Definition: OptimizedStructLayout.cpp:42
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:1082
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
llvm::AMDGPU::getAlign
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Definition: AMDGPUMemoryUtils.cpp:29
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::TypeSize
Definition: TypeSize.h:435
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1241
llvm::InstructionWorklist
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
Definition: InstructionWorklist.h:25
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1218
llvm::pdb::PDB_ColorItem::Padding
@ Padding
llvm::MDBuilder
Definition: MDBuilder.h:36
CallGraph.h
llvm::AMDGPULowerModuleLDSPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPULowerModuleLDSPass.cpp:608
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
Instructions.h
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2029
ModuleUtils.h
N
#define N
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
DerivedTypes.h
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:292
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
llvm::OptimizedStructLayoutField
A field in a structure.
Definition: OptimizedStructLayout.h:45
llvm::cl::desc
Definition: CommandLine.h:413
llvm::GlobalObject::setAlignment
void setAlignment(MaybeAlign Align)
Definition: Globals.cpp:121
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:667
llvm::AMDGPU::isKernelCC
bool isKernelCC(const Function *Func)
Definition: AMDGPUBaseInfo.cpp:1857
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
SetVector.h
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
AMDGPUBaseInfo.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365