LLVM  16.0.0git
AMDGPUReplaceLDSUseWithPointer.cpp
Go to the documentation of this file.
1 //===-- AMDGPUReplaceLDSUseWithPointer.cpp --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass replaces all the uses of LDS within non-kernel functions by
10 // corresponding pointer counter-parts.
11 //
12 // The main motivation behind this pass is - to *avoid* subsequent LDS lowering
13 // pass from directly packing LDS (assume large LDS) into a struct type which
14 // would otherwise cause allocating huge memory for struct instance within every
15 // kernel.
16 //
17 // Brief sketch of the algorithm implemented in this pass is as below:
18 //
19 // 1. Collect all the LDS defined in the module which qualify for pointer
20 // replacement, say it is, LDSGlobals set.
21 //
22 // 2. Collect all the reachable callees for each kernel defined in the module,
23 // say it is, KernelToCallees map.
24 //
25 // 3. FOR (each global GV from LDSGlobals set) DO
26 // LDSUsedNonKernels = Collect all non-kernel functions which use GV.
27 // FOR (each kernel K in KernelToCallees map) DO
28 // ReachableCallees = KernelToCallees[K]
29 // ReachableAndLDSUsedCallees =
30 // SetIntersect(LDSUsedNonKernels, ReachableCallees)
31 // IF (ReachableAndLDSUsedCallees is not empty) THEN
32 // Pointer = Create a pointer to point-to GV if not created.
33 // Initialize Pointer to point-to GV within kernel K.
34 // ENDIF
35 // ENDFOR
36 // Replace all uses of GV within non kernel functions by Pointer.
37 // ENFOR
38 //
39 // LLVM IR example:
40 //
41 // Input IR:
42 //
43 // @lds = internal addrspace(3) global [4 x i32] undef, align 16
44 //
45 // define internal void @f0() {
46 // entry:
47 // %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds,
48 // i32 0, i32 0
49 // ret void
50 // }
51 //
52 // define protected amdgpu_kernel void @k0() {
53 // entry:
54 // call void @f0()
55 // ret void
56 // }
57 //
58 // Output IR:
59 //
60 // @lds = internal addrspace(3) global [4 x i32] undef, align 16
61 // @lds.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
62 //
63 // define internal void @f0() {
64 // entry:
65 // %0 = load i16, i16 addrspace(3)* @lds.ptr, align 2
66 // %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
67 // %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
68 // %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2,
69 // i32 0, i32 0
70 // ret void
71 // }
72 //
73 // define protected amdgpu_kernel void @k0() {
74 // entry:
75 // store i16 ptrtoint ([4 x i32] addrspace(3)* @lds to i16),
76 // i16 addrspace(3)* @lds.ptr, align 2
77 // call void @f0()
78 // ret void
79 // }
80 //
81 //===----------------------------------------------------------------------===//
82 
83 #include "AMDGPU.h"
84 #include "GCNSubtarget.h"
85 #include "Utils/AMDGPUBaseInfo.h"
87 #include "llvm/ADT/DenseMap.h"
88 #include "llvm/ADT/STLExtras.h"
89 #include "llvm/ADT/SetOperations.h"
92 #include "llvm/IR/Constants.h"
93 #include "llvm/IR/DerivedTypes.h"
94 #include "llvm/IR/IRBuilder.h"
95 #include "llvm/IR/InlineAsm.h"
96 #include "llvm/IR/Instructions.h"
97 #include "llvm/IR/IntrinsicsAMDGPU.h"
99 #include "llvm/InitializePasses.h"
100 #include "llvm/Pass.h"
101 #include "llvm/Support/Debug.h"
105 #include <algorithm>
106 #include <vector>
107 
108 #define DEBUG_TYPE "amdgpu-replace-lds-use-with-pointer"
109 
110 using namespace llvm;
111 
112 namespace {
113 
114 namespace AMDGPU {
115 /// Collect all the instructions where user \p U belongs to. \p U could be
116 /// instruction itself or it could be a constant expression which is used within
117 /// an instruction. If \p CollectKernelInsts is true, collect instructions only
118 /// from kernels, otherwise collect instructions only from non-kernel functions.
120 getFunctionToInstsMap(User *U, bool CollectKernelInsts);
121 
122 SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV);
123 
124 } // namespace AMDGPU
125 
126 class ReplaceLDSUseImpl {
127  Module &M;
128  LLVMContext &Ctx;
129  const DataLayout &DL;
130  Constant *LDSMemBaseAddr;
131 
136  DenseMap<Function *, BasicBlock *> KernelToInitBB;
138  FunctionToLDSToReplaceInst;
139 
140  // Collect LDS which requires their uses to be replaced by pointer.
141  std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() {
142  // Collect LDS which requires module lowering.
143  std::vector<GlobalVariable *> LDSGlobals =
145 
146  // Remove LDS which don't qualify for replacement.
147  llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) {
148  return shouldIgnorePointerReplacement(GV);
149  });
150 
151  return LDSGlobals;
152  }
153 
154  // Returns true if uses of given LDS global within non-kernel functions should
155  // be keep as it is without pointer replacement.
156  bool shouldIgnorePointerReplacement(GlobalVariable *GV) {
157  // LDS whose size is very small and doesn't exceed pointer size is not worth
158  // replacing.
159  if (DL.getTypeAllocSize(GV->getValueType()) <= 2)
160  return true;
161 
162  // LDS which is not used from non-kernel function scope or it is used from
163  // global scope does not qualify for replacement.
164  LDSToNonKernels[GV] = AMDGPU::collectNonKernelAccessorsOfLDS(GV);
165  return LDSToNonKernels[GV].empty();
166 
167  // FIXME: When GV is used within all (or within most of the kernels), then
168  // it does not make sense to create a pointer for it.
169  }
170 
171  // Insert new global LDS pointer which points to LDS.
172  GlobalVariable *createLDSPointer(GlobalVariable *GV) {
173  // LDS pointer which points to LDS is already created? Return it.
174  auto PointerEntry = LDSToPointer.insert(std::make_pair(GV, nullptr));
175  if (!PointerEntry.second)
176  return PointerEntry.first->second;
177 
178  // We need to create new LDS pointer which points to LDS.
179  //
180  // Each CU owns at max 64K of LDS memory, so LDS address ranges from 0 to
181  // 2^16 - 1. Hence 16 bit pointer is enough to hold the LDS address.
182  auto *I16Ty = Type::getInt16Ty(Ctx);
183  GlobalVariable *LDSPointer = new GlobalVariable(
184  M, I16Ty, false, GlobalValue::InternalLinkage, UndefValue::get(I16Ty),
185  GV->getName() + Twine(".ptr"), nullptr, GlobalVariable::NotThreadLocal,
187 
189  LDSPointer->setAlignment(llvm::AMDGPU::getAlign(DL, LDSPointer));
190 
191  // Mark that an associated LDS pointer is created for LDS.
192  LDSToPointer[GV] = LDSPointer;
193 
194  return LDSPointer;
195  }
196 
197  // Split entry basic block in such a way that only lane 0 of each wave does
198  // the LDS pointer initialization, and return newly created basic block.
199  BasicBlock *activateLaneZero(Function *K) {
200  // If the entry basic block of kernel K is already split, then return
201  // newly created basic block.
202  auto BasicBlockEntry = KernelToInitBB.insert(std::make_pair(K, nullptr));
203  if (!BasicBlockEntry.second)
204  return BasicBlockEntry.first->second;
205 
206  // Split entry basic block of kernel K.
207  auto *EI = &(*(K->getEntryBlock().getFirstInsertionPt()));
208  IRBuilder<> Builder(EI);
209 
210  Value *Mbcnt =
211  Builder.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
212  {Builder.getInt32(-1), Builder.getInt32(0)});
213  Value *Cond = Builder.CreateICmpEQ(Mbcnt, Builder.getInt32(0));
214  Instruction *WB = cast<Instruction>(
215  Builder.CreateIntrinsic(Intrinsic::amdgcn_wave_barrier, {}, {}));
216 
217  BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent();
218 
219  // Mark that the entry basic block of kernel K is split.
220  KernelToInitBB[K] = NBB;
221 
222  return NBB;
223  }
224 
225  // Within given kernel, initialize given LDS pointer to point to given LDS.
226  void initializeLDSPointer(Function *K, GlobalVariable *GV,
227  GlobalVariable *LDSPointer) {
228  // If LDS pointer is already initialized within K, then nothing to do.
229  auto PointerEntry = KernelToLDSPointers.insert(
230  std::make_pair(K, SmallPtrSet<GlobalVariable *, 8>()));
231  if (!PointerEntry.second)
232  if (PointerEntry.first->second.contains(LDSPointer))
233  return;
234 
235  // Insert instructions at EI which initialize LDS pointer to point-to LDS
236  // within kernel K.
237  //
238  // That is, convert pointer type of GV to i16, and then store this converted
239  // i16 value within LDSPointer which is of type i16*.
240  auto *EI = &(*(activateLaneZero(K)->getFirstInsertionPt()));
241  IRBuilder<> Builder(EI);
242  Builder.CreateStore(Builder.CreatePtrToInt(GV, Type::getInt16Ty(Ctx)),
243  LDSPointer);
244 
245  // Mark that LDS pointer is initialized within kernel K.
246  KernelToLDSPointers[K].insert(LDSPointer);
247  }
248 
249  // We have created an LDS pointer for LDS, and initialized it to point-to LDS
250  // within all relevant kernels. Now replace all the uses of LDS within
251  // non-kernel functions by LDS pointer.
252  void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) {
253  SmallVector<User *, 8> LDSUsers(GV->users());
254  for (auto *U : LDSUsers) {
255  // When `U` is a constant expression, it is possible that same constant
256  // expression exists within multiple instructions, and within multiple
257  // non-kernel functions. Collect all those non-kernel functions and all
258  // those instructions within which `U` exist.
259  auto FunctionToInsts =
260  AMDGPU::getFunctionToInstsMap(U, false /*=CollectKernelInsts*/);
261 
262  for (const auto &FunctionToInst : FunctionToInsts) {
263  Function *F = FunctionToInst.first;
264  auto &Insts = FunctionToInst.second;
265  for (auto *I : Insts) {
266  // If `U` is a constant expression, then we need to break the
267  // associated instruction into a set of separate instructions by
268  // converting constant expressions into instructions.
270 
271  if (U == I) {
272  // `U` is an instruction, conversion from constant expression to
273  // set of instructions is *not* required.
274  UserInsts.insert(I);
275  } else {
276  // `U` is a constant expression, convert it into corresponding set
277  // of instructions.
278  auto *CE = cast<ConstantExpr>(U);
279  convertConstantExprsToInstructions(I, CE, &UserInsts);
280  }
281 
282  // Go through all the user instructions, if LDS exist within them as
283  // an operand, then replace it by replace instruction.
284  for (auto *II : UserInsts) {
285  auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer);
286  II->replaceUsesOfWith(GV, ReplaceInst);
287  }
288  }
289  }
290  }
291  }
292 
293  // Create a set of replacement instructions which together replace LDS within
294  // non-kernel function F by accessing LDS indirectly using LDS pointer.
295  Value *getReplacementInst(Function *F, GlobalVariable *GV,
296  GlobalVariable *LDSPointer) {
297  // If the instruction which replaces LDS within F is already created, then
298  // return it.
299  auto LDSEntry = FunctionToLDSToReplaceInst.insert(
300  std::make_pair(F, DenseMap<GlobalVariable *, Value *>()));
301  if (!LDSEntry.second) {
302  auto ReplaceInstEntry =
303  LDSEntry.first->second.insert(std::make_pair(GV, nullptr));
304  if (!ReplaceInstEntry.second)
305  return ReplaceInstEntry.first->second;
306  }
307 
308  // Get the instruction insertion point within the beginning of the entry
309  // block of current non-kernel function.
310  auto *EI = &(*(F->getEntryBlock().getFirstInsertionPt()));
311  IRBuilder<> Builder(EI);
312 
313  // Insert required set of instructions which replace LDS within F.
314  auto *V = Builder.CreateBitCast(
315  Builder.CreateGEP(
316  Builder.getInt8Ty(), LDSMemBaseAddr,
317  Builder.CreateLoad(LDSPointer->getValueType(), LDSPointer)),
318  GV->getType());
319 
320  // Mark that the replacement instruction which replace LDS within F is
321  // created.
322  FunctionToLDSToReplaceInst[F][GV] = V;
323 
324  return V;
325  }
326 
327 public:
328  ReplaceLDSUseImpl(Module &M)
329  : M(M), Ctx(M.getContext()), DL(M.getDataLayout()) {
330  LDSMemBaseAddr = Constant::getIntegerValue(
331  PointerType::get(Type::getInt8Ty(M.getContext()),
333  APInt(32, 0));
334  }
335 
336  // Entry-point function which interface ReplaceLDSUseImpl with outside of the
337  // class.
338  bool replaceLDSUse();
339 
340 private:
341  // For a given LDS from collected LDS globals set, replace its non-kernel
342  // function scope uses by pointer.
343  bool replaceLDSUse(GlobalVariable *GV);
344 };
345 
346 // For given LDS from collected LDS globals set, replace its non-kernel function
347 // scope uses by pointer.
348 bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
349  // Holds all those non-kernel functions within which LDS is being accessed.
350  SmallPtrSet<Function *, 8> &LDSAccessors = LDSToNonKernels[GV];
351 
352  // The LDS pointer which points to LDS and replaces all the uses of LDS.
353  GlobalVariable *LDSPointer = nullptr;
354 
355  // Traverse through each kernel K, check and if required, initialize the
356  // LDS pointer to point to LDS within K.
357  for (const auto &KernelToCallee : KernelToCallees) {
358  Function *K = KernelToCallee.first;
359  SmallPtrSet<Function *, 8> Callees = KernelToCallee.second;
360 
361  // Compute reachable and LDS used callees for kernel K.
362  set_intersect(Callees, LDSAccessors);
363 
364  // None of the LDS accessing non-kernel functions are reachable from
365  // kernel K. Hence, no need to initialize LDS pointer within kernel K.
366  if (Callees.empty())
367  continue;
368 
369  // We have found reachable and LDS used callees for kernel K, and we need to
370  // initialize LDS pointer within kernel K, and we need to replace LDS use
371  // within those callees by LDS pointer.
372  //
373  // But, first check if LDS pointer is already created, if not create one.
374  LDSPointer = createLDSPointer(GV);
375 
376  // Initialize LDS pointer to point to LDS within kernel K.
377  initializeLDSPointer(K, GV, LDSPointer);
378  }
379 
380  // We have not found reachable and LDS used callees for any of the kernels,
381  // and hence we have not created LDS pointer.
382  if (!LDSPointer)
383  return false;
384 
385  // We have created an LDS pointer for LDS, and initialized it to point-to LDS
386  // within all relevant kernels. Now replace all the uses of LDS within
387  // non-kernel functions by LDS pointer.
388  replaceLDSUseByPointer(GV, LDSPointer);
389 
390  return true;
391 }
392 
393 namespace AMDGPU {
394 
395 // An helper class for collecting all reachable callees for each kernel defined
396 // within the module.
397 class CollectReachableCallees {
398  Module &M;
399  CallGraph CG;
400  SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions;
401 
402  // Collect all address taken functions within the module.
403  void collectAddressTakenFunctions() {
404  auto *ECNode = CG.getExternalCallingNode();
405 
406  for (const auto &GI : *ECNode) {
407  auto *CGN = GI.second;
408  auto *F = CGN->getFunction();
409  if (!F || F->isDeclaration() || llvm::AMDGPU::isKernelCC(F))
410  continue;
411  AddressTakenFunctions.insert(CGN);
412  }
413  }
414 
415  // For given kernel, collect all its reachable non-kernel functions.
416  SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) {
417  SmallPtrSet<Function *, 8> ReachableCallees;
418 
419  // Call graph node which represents this kernel.
420  auto *KCGN = CG[K];
421 
422  // Go through all call graph nodes reachable from the node representing this
423  // kernel, visit all their call sites, if the call site is direct, add
424  // corresponding callee to reachable callee set, if it is indirect, resolve
425  // the indirect call site to potential reachable callees, add them to
426  // reachable callee set, and repeat the process for the newly added
427  // potential callee nodes.
428  //
429  // FIXME: Need to handle bit-casted function pointers.
430  //
432  SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes;
433  while (!CGNStack.empty()) {
434  auto *CGN = CGNStack.pop_back_val();
435 
436  if (!VisitedCGNodes.insert(CGN).second)
437  continue;
438 
439  // Ignore call graph node which does not have associated function or
440  // associated function is not a definition.
441  if (!CGN->getFunction() || CGN->getFunction()->isDeclaration())
442  continue;
443 
444  for (const auto &GI : *CGN) {
445  auto *RCB = cast<CallBase>(*GI.first);
446  auto *RCGN = GI.second;
447 
448  if (auto *DCallee = RCGN->getFunction()) {
449  ReachableCallees.insert(DCallee);
450  } else if (RCB->isIndirectCall()) {
451  auto *RCBFTy = RCB->getFunctionType();
452  for (auto *ACGN : AddressTakenFunctions) {
453  auto *ACallee = ACGN->getFunction();
454  if (ACallee->getFunctionType() == RCBFTy) {
455  ReachableCallees.insert(ACallee);
456  CGNStack.append(df_begin(ACGN), df_end(ACGN));
457  }
458  }
459  }
460  }
461  }
462 
463  return ReachableCallees;
464  }
465 
466 public:
467  explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) {
468  // Collect address taken functions.
469  collectAddressTakenFunctions();
470  }
471 
472  void collectReachableCallees(
473  DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
474  // Collect reachable callee set for each kernel defined in the module.
475  for (Function &F : M.functions()) {
477  continue;
478  Function *K = &F;
479  KernelToCallees[K] = collectReachableCallees(K);
480  }
481  }
482 };
483 
484 /// Collect reachable callees for each kernel defined in the module \p M and
485 /// return collected callees at \p KernelToCallees.
486 void collectReachableCallees(
487  Module &M,
488  DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
489  CollectReachableCallees CRC{M};
490  CRC.collectReachableCallees(KernelToCallees);
491 }
492 
493 /// For the given LDS global \p GV, visit all its users and collect all
494 /// non-kernel functions within which \p GV is used and return collected list of
495 /// such non-kernel functions.
496 SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) {
497  SmallPtrSet<Function *, 8> LDSAccessors;
498  SmallVector<User *, 8> UserStack(GV->users());
499  SmallPtrSet<User *, 8> VisitedUsers;
500 
501  while (!UserStack.empty()) {
502  auto *U = UserStack.pop_back_val();
503 
504  // `U` is already visited? continue to next one.
505  if (!VisitedUsers.insert(U).second)
506  continue;
507 
508  // `U` is a global variable which is initialized with LDS. Ignore LDS.
509  if (isa<GlobalValue>(U))
511 
512  // Recursively explore constant users.
513  if (isa<Constant>(U)) {
514  append_range(UserStack, U->users());
515  continue;
516  }
517 
518  // `U` should be an instruction, if it belongs to a non-kernel function F,
519  // then collect F.
520  Function *F = cast<Instruction>(U)->getFunction();
522  LDSAccessors.insert(F);
523  }
524 
525  return LDSAccessors;
526 }
527 
529 getFunctionToInstsMap(User *U, bool CollectKernelInsts) {
531  SmallVector<User *, 8> UserStack;
532  SmallPtrSet<User *, 8> VisitedUsers;
533 
534  UserStack.push_back(U);
535 
536  while (!UserStack.empty()) {
537  auto *UU = UserStack.pop_back_val();
538 
539  if (!VisitedUsers.insert(UU).second)
540  continue;
541 
542  if (isa<GlobalValue>(UU))
543  continue;
544 
545  if (isa<Constant>(UU)) {
546  append_range(UserStack, UU->users());
547  continue;
548  }
549 
550  auto *I = cast<Instruction>(UU);
551  Function *F = I->getFunction();
552  if (CollectKernelInsts) {
553  if (!llvm::AMDGPU::isKernelCC(F)) {
554  continue;
555  }
556  } else {
558  continue;
559  }
560  }
561 
562  FunctionToInsts.insert(std::make_pair(F, SmallPtrSet<Instruction *, 8>()));
563  FunctionToInsts[F].insert(I);
564  }
565 
566  return FunctionToInsts;
567 }
568 
569 } // namespace AMDGPU
570 
571 // Entry-point function which interface ReplaceLDSUseImpl with outside of the
572 // class.
573 bool ReplaceLDSUseImpl::replaceLDSUse() {
574  // Collect LDS which requires their uses to be replaced by pointer.
575  std::vector<GlobalVariable *> LDSGlobals =
576  collectLDSRequiringPointerReplace();
577 
578  // No LDS to pointer-replace. Nothing to do.
579  if (LDSGlobals.empty())
580  return false;
581 
582  // Collect reachable callee set for each kernel defined in the module.
583  AMDGPU::collectReachableCallees(M, KernelToCallees);
584 
585  if (KernelToCallees.empty()) {
586  // Either module does not have any kernel definitions, or none of the kernel
587  // has a call to non-kernel functions, or we could not resolve any of the
588  // call sites to proper non-kernel functions, because of the situations like
589  // inline asm calls. Nothing to replace.
590  return false;
591  }
592 
593  // For every LDS from collected LDS globals set, replace its non-kernel
594  // function scope use by pointer.
595  bool Changed = false;
596  for (auto *GV : LDSGlobals)
597  Changed |= replaceLDSUse(GV);
598 
599  return Changed;
600 }
601 
602 class AMDGPUReplaceLDSUseWithPointer : public ModulePass {
603 public:
604  static char ID;
605 
606  AMDGPUReplaceLDSUseWithPointer() : ModulePass(ID) {
609  }
610 
611  bool runOnModule(Module &M) override;
612 
613  void getAnalysisUsage(AnalysisUsage &AU) const override {
615  }
616 };
617 
618 } // namespace
619 
623 
625  AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE,
626  "Replace within non-kernel function use of LDS with pointer",
627  false /*only look at the cfg*/, false /*analysis pass*/)
630  AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE,
631  "Replace within non-kernel function use of LDS with pointer",
632  false /*only look at the cfg*/, false /*analysis pass*/)
633 
634 bool AMDGPUReplaceLDSUseWithPointer::runOnModule(Module &M) {
635  ReplaceLDSUseImpl LDSUseReplacer{M};
636  return LDSUseReplacer.replaceLDSUse();
637 }
638 
640  return new AMDGPUReplaceLDSUseWithPointer();
641 }
642 
645  ReplaceLDSUseImpl LDSUseReplacer{M};
646  LDSUseReplacer.replaceLDSUse();
647  return PreservedAnalyses::all();
648 }
use
Move duplicate certain instructions close to their use
Definition: Localizer.cpp:32
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::CallGraph::getExternalCallingNode
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:127
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
SetOperations.h
llvm::Function
Definition: Function.h:60
pointer
Replace within non kernel function use of LDS with pointer
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:631
Pass.h
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:192
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::AMDGPUReplaceLDSUseWithPointerID
char & AMDGPUReplaceLDSUseWithPointerID
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:621
InlineAsm.h
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:691
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1972
llvm::df_end
df_iterator< T > df_end(const T &G)
Definition: DepthFirstIterator.h:224
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
DenseMap.h
llvm::GlobalValue::UnnamedAddr::Global
@ Global
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE, "Replace within non-kernel function use of LDS with pointer", false, false) INITIALIZE_PASS_END(AMDGPUReplaceLDSUseWithPointer
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
STLExtras.h
llvm::GlobalValue::setUnnamedAddr
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:227
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::AMDGPU::findLDSVariablesToLower
std::vector< GlobalVariable * > findLDSVariablesToLower(Module &M, const Function *F)
Definition: AMDGPUMemoryUtils.cpp:103
AMDGPUMemoryUtils.h
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
GCNSubtarget.h
llvm::User
Definition: User.h:44
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::convertConstantExprsToInstructions
void convertConstantExprsToInstructions(Instruction *I, ConstantExpr *CE, SmallPtrSetImpl< Instruction * > *Insts=nullptr)
The given instruction I contains given constant expression CE as one of its operands,...
Definition: ReplaceConstant.cpp:22
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::Instruction
Definition: Instruction.h:42
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::initializeAMDGPUReplaceLDSUseWithPointerPass
void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &)
llvm::AMDGPUReplaceLDSUseWithPointerPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:644
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:108
llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition: SmallPtrSet.h:92
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
TargetPassConfig.h
llvm::df_begin
df_iterator< T > df_begin(const T &G)
Definition: DepthFirstIterator.h:219
IRBuilder.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
bool empty() const
Definition: DenseMap.h:98
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
AMDGPU.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1988
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
llvm::AMDGPU::getAlign
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Definition: AMDGPUMemoryUtils.cpp:29
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::createAMDGPUReplaceLDSUseWithPointerPass
ModulePass * createAMDGPUReplaceLDSUseWithPointerPass()
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:639
CallGraph.h
Instructions.h
llvm::Constant::getIntegerValue
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:378
ModuleUtils.h
ReplaceConstant.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::set_intersect
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:238
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:292
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1525
llvm::GlobalObject::setAlignment
void setAlignment(MaybeAlign Align)
Definition: Globals.cpp:121
llvm::AMDGPU::isKernelCC
bool isKernelCC(const Function *Func)
Definition: AMDGPUBaseInfo.cpp:1829
BasicBlockUtils.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
of
Add support for conditional and other related patterns Instead of
Definition: README.txt:134
AMDGPUBaseInfo.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365