LLVM  16.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/IR/IntrinsicsAMDGPU.h"
18 #include "llvm/IR/IntrinsicsR600.h"
21 
22 #define DEBUG_TYPE "amdgpu-attributor"
23 
24 using namespace llvm;
25 
26 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
27 
29  #include "AMDGPUAttributes.def"
31 };
32 
33 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
34 
37  #include "AMDGPUAttributes.def"
39 };
40 
41 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
42 static constexpr std::pair<ImplicitArgumentMask,
44  #include "AMDGPUAttributes.def"
45 };
46 
47 // We do not need to note the x workitem or workgroup id because they are always
48 // initialized.
49 //
50 // TODO: We should not add the attributes if the known compile time workgroup
51 // size is 1 for y/z.
53 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
54  bool HasApertureRegs, bool SupportsGetDoorBellID) {
55  unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
56  switch (ID) {
57  case Intrinsic::amdgcn_workitem_id_x:
58  NonKernelOnly = true;
59  return WORKITEM_ID_X;
60  case Intrinsic::amdgcn_workgroup_id_x:
61  NonKernelOnly = true;
62  return WORKGROUP_ID_X;
63  case Intrinsic::amdgcn_workitem_id_y:
64  case Intrinsic::r600_read_tidig_y:
65  return WORKITEM_ID_Y;
66  case Intrinsic::amdgcn_workitem_id_z:
67  case Intrinsic::r600_read_tidig_z:
68  return WORKITEM_ID_Z;
69  case Intrinsic::amdgcn_workgroup_id_y:
70  case Intrinsic::r600_read_tgid_y:
71  return WORKGROUP_ID_Y;
72  case Intrinsic::amdgcn_workgroup_id_z:
73  case Intrinsic::r600_read_tgid_z:
74  return WORKGROUP_ID_Z;
75  case Intrinsic::amdgcn_lds_kernel_id:
76  return LDS_KERNEL_ID;
77  case Intrinsic::amdgcn_dispatch_ptr:
78  return DISPATCH_PTR;
79  case Intrinsic::amdgcn_dispatch_id:
80  return DISPATCH_ID;
81  case Intrinsic::amdgcn_implicitarg_ptr:
82  return IMPLICIT_ARG_PTR;
83  // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
84  // queue_ptr.
85  case Intrinsic::amdgcn_queue_ptr:
86  NeedsImplicit = (CodeObjectVersion == 5);
87  return QUEUE_PTR;
88  case Intrinsic::amdgcn_is_shared:
89  case Intrinsic::amdgcn_is_private:
90  if (HasApertureRegs)
91  return NOT_IMPLICIT_INPUT;
92  // Under V5, we need implicitarg_ptr + offsets to access private_base or
93  // shared_base. For pre-V5, however, need to access them through queue_ptr +
94  // offsets.
95  return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
96  case Intrinsic::trap:
97  if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
98  return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
99  NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
100  return QUEUE_PTR;
101  default:
102  return NOT_IMPLICIT_INPUT;
103  }
104 }
105 
106 static bool castRequiresQueuePtr(unsigned SrcAS) {
107  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
108 }
109 
110 static bool isDSAddress(const Constant *C) {
111  const GlobalValue *GV = dyn_cast<GlobalValue>(C);
112  if (!GV)
113  return false;
114  unsigned AS = GV->getAddressSpace();
116 }
117 
118 /// Returns true if the function requires the implicit argument be passed
119 /// regardless of the function contents.
120 static bool funcRequiresHostcallPtr(const Function &F) {
121  // Sanitizers require the hostcall buffer passed in the implicit arguments.
122  return F.hasFnAttribute(Attribute::SanitizeAddress) ||
123  F.hasFnAttribute(Attribute::SanitizeThread) ||
124  F.hasFnAttribute(Attribute::SanitizeMemory) ||
125  F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
126  F.hasFnAttribute(Attribute::SanitizeMemTag);
127 }
128 
129 namespace {
130 class AMDGPUInformationCache : public InformationCache {
131 public:
132  AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
135  : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
136  TargetMachine &TM;
137 
138  enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
139 
140  /// Check if the subtarget has aperture regs.
141  bool hasApertureRegs(Function &F) {
142  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
143  return ST.hasApertureRegs();
144  }
145 
146  /// Check if the subtarget supports GetDoorbellID.
147  bool supportsGetDoorbellID(Function &F) {
148  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
149  return ST.supportsGetDoorbellID();
150  }
151 
152  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
153  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
154  return ST.getFlatWorkGroupSizes(F);
155  }
156 
157  std::pair<unsigned, unsigned>
158  getMaximumFlatWorkGroupRange(const Function &F) {
159  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
160  return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
161  }
162 
163 private:
164  /// Check if the ConstantExpr \p CE requires the queue pointer.
165  static bool visitConstExpr(const ConstantExpr *CE) {
166  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
167  unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
168  return castRequiresQueuePtr(SrcAS);
169  }
170  return false;
171  }
172 
173  /// Get the constant access bitmap for \p C.
174  uint8_t getConstantAccess(const Constant *C) {
175  auto It = ConstantStatus.find(C);
176  if (It != ConstantStatus.end())
177  return It->second;
178 
179  uint8_t Result = 0;
180  if (isDSAddress(C))
181  Result = DS_GLOBAL;
182 
183  if (const auto *CE = dyn_cast<ConstantExpr>(C))
184  if (visitConstExpr(CE))
185  Result |= ADDR_SPACE_CAST;
186 
187  for (const Use &U : C->operands()) {
188  const auto *OpC = dyn_cast<Constant>(U);
189  if (!OpC)
190  continue;
191 
192  Result |= getConstantAccess(OpC);
193  }
194  return Result;
195  }
196 
197 public:
198  /// Returns true if \p Fn needs the queue pointer because of \p C.
199  bool needsQueuePtr(const Constant *C, Function &Fn) {
200  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
201  bool HasAperture = hasApertureRegs(Fn);
202 
203  // No need to explore the constants.
204  if (!IsNonEntryFunc && HasAperture)
205  return false;
206 
207  uint8_t Access = getConstantAccess(C);
208 
209  // We need to trap on DS globals in non-entry functions.
210  if (IsNonEntryFunc && (Access & DS_GLOBAL))
211  return true;
212 
213  return !HasAperture && (Access & ADDR_SPACE_CAST);
214  }
215 
216 private:
217  /// Used to determine if the Constant needs the queue pointer.
219 };
220 
221 struct AAAMDAttributes : public StateWrapper<
222  BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
225 
226  AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
227 
228  /// Create an abstract attribute view for the position \p IRP.
229  static AAAMDAttributes &createForPosition(const IRPosition &IRP,
230  Attributor &A);
231 
232  /// See AbstractAttribute::getName().
233  const std::string getName() const override { return "AAAMDAttributes"; }
234 
235  /// See AbstractAttribute::getIdAddr().
236  const char *getIdAddr() const override { return &ID; }
237 
238  /// This function should return true if the type of the \p AA is
239  /// AAAMDAttributes.
240  static bool classof(const AbstractAttribute *AA) {
241  return (AA->getIdAddr() == &ID);
242  }
243 
244  /// Unique ID (due to the unique address)
245  static const char ID;
246 };
247 const char AAAMDAttributes::ID = 0;
248 
249 struct AAUniformWorkGroupSize
250  : public StateWrapper<BooleanState, AbstractAttribute> {
252  AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
253 
254  /// Create an abstract attribute view for the position \p IRP.
255  static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
256  Attributor &A);
257 
258  /// See AbstractAttribute::getName().
259  const std::string getName() const override {
260  return "AAUniformWorkGroupSize";
261  }
262 
263  /// See AbstractAttribute::getIdAddr().
264  const char *getIdAddr() const override { return &ID; }
265 
266  /// This function should return true if the type of the \p AA is
267  /// AAAMDAttributes.
268  static bool classof(const AbstractAttribute *AA) {
269  return (AA->getIdAddr() == &ID);
270  }
271 
272  /// Unique ID (due to the unique address)
273  static const char ID;
274 };
275 const char AAUniformWorkGroupSize::ID = 0;
276 
277 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
278  AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
279  : AAUniformWorkGroupSize(IRP, A) {}
280 
281  void initialize(Attributor &A) override {
282  Function *F = getAssociatedFunction();
283  CallingConv::ID CC = F->getCallingConv();
284 
286  return;
287 
288  bool InitialValue = false;
289  if (F->hasFnAttribute("uniform-work-group-size"))
290  InitialValue = F->getFnAttribute("uniform-work-group-size")
291  .getValueAsString()
292  .equals("true");
293 
294  if (InitialValue)
295  indicateOptimisticFixpoint();
296  else
297  indicatePessimisticFixpoint();
298  }
299 
300  ChangeStatus updateImpl(Attributor &A) override {
302 
303  auto CheckCallSite = [&](AbstractCallSite CS) {
304  Function *Caller = CS.getInstruction()->getFunction();
305  LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
306  << "->" << getAssociatedFunction()->getName() << "\n");
307 
308  const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
309  *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
310 
311  Change = Change | clampStateAndIndicateChange(this->getState(),
312  CallerInfo.getState());
313 
314  return true;
315  };
316 
317  bool AllCallSitesKnown = true;
318  if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
319  return indicatePessimisticFixpoint();
320 
321  return Change;
322  }
323 
324  ChangeStatus manifest(Attributor &A) override {
325  SmallVector<Attribute, 8> AttrList;
326  LLVMContext &Ctx = getAssociatedFunction()->getContext();
327 
328  AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
329  getAssumed() ? "true" : "false"));
330  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
331  /* ForceReplace */ true);
332  }
333 
334  bool isValidState() const override {
335  // This state is always valid, even when the state is false.
336  return true;
337  }
338 
339  const std::string getAsStr() const override {
340  return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
341  }
342 
343  /// See AbstractAttribute::trackStatistics()
344  void trackStatistics() const override {}
345 };
346 
347 AAUniformWorkGroupSize &
348 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
349  Attributor &A) {
351  return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
353  "AAUniformWorkGroupSize is only valid for function position");
354 }
355 
356 struct AAAMDAttributesFunction : public AAAMDAttributes {
357  AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
358  : AAAMDAttributes(IRP, A) {}
359 
360  void initialize(Attributor &A) override {
361  Function *F = getAssociatedFunction();
362 
363  // If the function requires the implicit arg pointer due to sanitizers,
364  // assume it's needed even if explicitly marked as not requiring it.
365  const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
366  if (NeedsHostcall) {
367  removeAssumedBits(IMPLICIT_ARG_PTR);
368  removeAssumedBits(HOSTCALL_PTR);
369  }
370 
371  for (auto Attr : ImplicitAttrs) {
372  if (NeedsHostcall &&
373  (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
374  continue;
375 
376  if (F->hasFnAttribute(Attr.second))
377  addKnownBits(Attr.first);
378  }
379 
380  if (F->isDeclaration())
381  return;
382 
383  // Ignore functions with graphics calling conventions, these are currently
384  // not allowed to have kernel arguments.
385  if (AMDGPU::isGraphics(F->getCallingConv())) {
386  indicatePessimisticFixpoint();
387  return;
388  }
389  }
390 
391  ChangeStatus updateImpl(Attributor &A) override {
392  Function *F = getAssociatedFunction();
393  // The current assumed state used to determine a change.
394  auto OrigAssumed = getAssumed();
395 
396  // Check for Intrinsics and propagate attributes.
397  const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
398  *this, this->getIRPosition(), DepClassTy::REQUIRED);
399  if (AAEdges.hasNonAsmUnknownCallee())
400  return indicatePessimisticFixpoint();
401 
402  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
403 
404  bool NeedsImplicit = false;
405  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
406  bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
407  bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
408 
409  for (Function *Callee : AAEdges.getOptimisticEdges()) {
410  Intrinsic::ID IID = Callee->getIntrinsicID();
411  if (IID == Intrinsic::not_intrinsic) {
412  const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
413  *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
414  *this &= AAAMD;
415  continue;
416  }
417 
418  bool NonKernelOnly = false;
419  ImplicitArgumentMask AttrMask =
420  intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
421  HasApertureRegs, SupportsGetDoorbellID);
422  if (AttrMask != NOT_IMPLICIT_INPUT) {
423  if ((IsNonEntryFunc || !NonKernelOnly))
424  removeAssumedBits(AttrMask);
425  }
426  }
427 
428  // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
429  if (NeedsImplicit)
430  removeAssumedBits(IMPLICIT_ARG_PTR);
431 
432  if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
433  // Under V5, we need implicitarg_ptr + offsets to access private_base or
434  // shared_base. We do not actually need queue_ptr.
436  removeAssumedBits(IMPLICIT_ARG_PTR);
437  else
438  removeAssumedBits(QUEUE_PTR);
439  }
440 
441  if (funcRetrievesMultigridSyncArg(A)) {
442  assert(!isAssumed(IMPLICIT_ARG_PTR) &&
443  "multigrid_sync_arg needs implicitarg_ptr");
444  removeAssumedBits(MULTIGRID_SYNC_ARG);
445  }
446 
447  if (funcRetrievesHostcallPtr(A)) {
448  assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
449  removeAssumedBits(HOSTCALL_PTR);
450  }
451 
452  if (funcRetrievesHeapPtr(A)) {
453  assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
454  removeAssumedBits(HEAP_PTR);
455  }
456 
457  if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
458  assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
459  removeAssumedBits(QUEUE_PTR);
460  }
461 
462  if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
463  removeAssumedBits(LDS_KERNEL_ID);
464  }
465 
466  return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
468  }
469 
470  ChangeStatus manifest(Attributor &A) override {
471  SmallVector<Attribute, 8> AttrList;
472  LLVMContext &Ctx = getAssociatedFunction()->getContext();
473 
474  for (auto Attr : ImplicitAttrs) {
475  if (isKnown(Attr.first))
476  AttrList.push_back(Attribute::get(Ctx, Attr.second));
477  }
478 
479  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
480  /* ForceReplace */ true);
481  }
482 
483  const std::string getAsStr() const override {
484  std::string Str;
485  raw_string_ostream OS(Str);
486  OS << "AMDInfo[";
487  for (auto Attr : ImplicitAttrs)
488  OS << ' ' << Attr.second;
489  OS << " ]";
490  return OS.str();
491  }
492 
493  /// See AbstractAttribute::trackStatistics()
494  void trackStatistics() const override {}
495 
496 private:
497  bool checkForQueuePtr(Attributor &A) {
498  Function *F = getAssociatedFunction();
499  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
500 
501  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
502 
503  bool NeedsQueuePtr = false;
504 
505  auto CheckAddrSpaceCasts = [&](Instruction &I) {
506  unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
507  if (castRequiresQueuePtr(SrcAS)) {
508  NeedsQueuePtr = true;
509  return false;
510  }
511  return true;
512  };
513 
514  bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
515 
516  // `checkForAllInstructions` is much more cheaper than going through all
517  // instructions, try it first.
518 
519  // The queue pointer is not needed if aperture regs is present.
520  if (!HasApertureRegs) {
521  bool UsedAssumedInformation = false;
522  A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
523  {Instruction::AddrSpaceCast},
524  UsedAssumedInformation);
525  }
526 
527  // If we found that we need the queue pointer, nothing else to do.
528  if (NeedsQueuePtr)
529  return true;
530 
531  if (!IsNonEntryFunc && HasApertureRegs)
532  return false;
533 
534  for (BasicBlock &BB : *F) {
535  for (Instruction &I : BB) {
536  for (const Use &U : I.operands()) {
537  if (const auto *C = dyn_cast<Constant>(U)) {
538  if (InfoCache.needsQueuePtr(C, *F))
539  return true;
540  }
541  }
542  }
543  }
544 
545  return false;
546  }
547 
548  bool funcRetrievesMultigridSyncArg(Attributor &A) {
550  AA::OffsetAndSize OAS(Pos, 8);
551  return funcRetrievesImplicitKernelArg(A, OAS);
552  }
553 
554  bool funcRetrievesHostcallPtr(Attributor &A) {
556  AA::OffsetAndSize OAS(Pos, 8);
557  return funcRetrievesImplicitKernelArg(A, OAS);
558  }
559 
560  bool funcRetrievesHeapPtr(Attributor &A) {
562  return false;
564  return funcRetrievesImplicitKernelArg(A, OAS);
565  }
566 
567  bool funcRetrievesQueuePtr(Attributor &A) {
569  return false;
571  return funcRetrievesImplicitKernelArg(A, OAS);
572  }
573 
574  bool funcRetrievesImplicitKernelArg(Attributor &A,
575  AA::OffsetAndSize OAS) {
576  // Check if this is a call to the implicitarg_ptr builtin and it
577  // is used to retrieve the hostcall pointer. The implicit arg for
578  // hostcall is not used only if every use of the implicitarg_ptr
579  // is a load that clearly does not retrieve any byte of the
580  // hostcall pointer. We check this by tracing all the uses of the
581  // initial call to the implicitarg_ptr intrinsic.
582  auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
583  auto &Call = cast<CallBase>(I);
584  if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
585  return true;
586 
587  const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
589 
590  return PointerInfoAA.forallInterferingAccesses(
591  OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
592  return Acc.getRemoteInst()->isDroppable();
593  });
594  };
595 
596  bool UsedAssumedInformation = false;
597  return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
598  UsedAssumedInformation);
599  }
600 
601  bool funcRetrievesLDSKernelId(Attributor &A) {
602  auto DoesNotRetrieve = [&](Instruction &I) {
603  auto &Call = cast<CallBase>(I);
604  return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
605  };
606  bool UsedAssumedInformation = false;
607  return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
608  UsedAssumedInformation);
609  }
610 };
611 
612 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
613  Attributor &A) {
615  return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
616  llvm_unreachable("AAAMDAttributes is only valid for function position");
617 }
618 
619 /// Propagate amdgpu-flat-work-group-size attribute.
620 struct AAAMDFlatWorkGroupSize
621  : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
623  AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
624  : Base(IRP, 32) {}
625 
626  /// See AbstractAttribute::getState(...).
627  IntegerRangeState &getState() override { return *this; }
628  const IntegerRangeState &getState() const override { return *this; }
629 
630  void initialize(Attributor &A) override {
631  Function *F = getAssociatedFunction();
632  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
633  unsigned MinGroupSize, MaxGroupSize;
634  std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
635  intersectKnown(
636  ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
637 
638  if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
639  indicatePessimisticFixpoint();
640  }
641 
642  ChangeStatus updateImpl(Attributor &A) override {
644 
645  auto CheckCallSite = [&](AbstractCallSite CS) {
646  Function *Caller = CS.getInstruction()->getFunction();
647  LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
648  << "->" << getAssociatedFunction()->getName() << '\n');
649 
650  const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
651  *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
652 
653  Change |=
654  clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
655 
656  return true;
657  };
658 
659  bool AllCallSitesKnown = true;
660  if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
661  return indicatePessimisticFixpoint();
662 
663  return Change;
664  }
665 
666  ChangeStatus manifest(Attributor &A) override {
667  SmallVector<Attribute, 8> AttrList;
668  Function *F = getAssociatedFunction();
669  LLVMContext &Ctx = F->getContext();
670 
671  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
672  unsigned Min, Max;
673  std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
674 
675  // Don't add the attribute if it's the implied default.
676  if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
678 
679  SmallString<10> Buffer;
680  raw_svector_ostream OS(Buffer);
681  OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
682 
683  AttrList.push_back(
684  Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
685  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
686  /* ForceReplace */ true);
687  }
688 
689  const std::string getAsStr() const override {
690  std::string Str;
691  raw_string_ostream OS(Str);
692  OS << "AMDFlatWorkGroupSize[";
693  OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
694  OS << ']';
695  return OS.str();
696  }
697 
698  /// See AbstractAttribute::trackStatistics()
699  void trackStatistics() const override {}
700 
701  /// Create an abstract attribute view for the position \p IRP.
702  static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
703  Attributor &A);
704 
705  /// See AbstractAttribute::getName()
706  const std::string getName() const override {
707  return "AAAMDFlatWorkGroupSize";
708  }
709 
710  /// See AbstractAttribute::getIdAddr()
711  const char *getIdAddr() const override { return &ID; }
712 
713  /// This function should return true if the type of the \p AA is
714  /// AAAMDFlatWorkGroupSize
715  static bool classof(const AbstractAttribute *AA) {
716  return (AA->getIdAddr() == &ID);
717  }
718 
719  /// Unique ID (due to the unique address)
720  static const char ID;
721 };
722 
723 const char AAAMDFlatWorkGroupSize::ID = 0;
724 
725 AAAMDFlatWorkGroupSize &
726 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
727  Attributor &A) {
729  return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
731  "AAAMDFlatWorkGroupSize is only valid for function position");
732 }
733 
734 class AMDGPUAttributor : public ModulePass {
735 public:
736  AMDGPUAttributor() : ModulePass(ID) {}
737 
738  /// doInitialization - Virtual method overridden by subclasses to do
739  /// any necessary initialization before any pass is run.
740  bool doInitialization(Module &) override {
741  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
742  if (!TPC)
743  report_fatal_error("TargetMachine is required");
744 
745  TM = &TPC->getTM<TargetMachine>();
746  return false;
747  }
748 
749  bool runOnModule(Module &M) override {
750  SetVector<Function *> Functions;
751  AnalysisGetter AG;
752  for (Function &F : M) {
753  if (!F.isIntrinsic())
754  Functions.insert(&F);
755  }
756 
757  CallGraphUpdater CGUpdater;
759  AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
764 
765  AttributorConfig AC(CGUpdater);
766  AC.Allowed = &Allowed;
767  AC.IsModulePass = true;
768  AC.DefaultInitializeLiveInternals = false;
769 
770  Attributor A(Functions, InfoCache, AC);
771 
772  for (Function &F : M) {
773  if (!F.isIntrinsic()) {
774  A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
775  A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
776  if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
777  A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
778  }
779  }
780  }
781 
782  ChangeStatus Change = A.run();
783  return Change == ChangeStatus::CHANGED;
784  }
785 
786  StringRef getPassName() const override { return "AMDGPU Attributor"; }
787  TargetMachine *TM;
788  static char ID;
789 };
790 } // namespace
791 
792 char AMDGPUAttributor::ID = 0;
793 
794 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
795 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
llvm::CallGraphUpdater
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
Definition: CallGraphUpdater.h:29
llvm::IRPosition::function
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition: Attributor.h:546
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::createAMDGPUAttributorPass
Pass * createAMDGPUAttributorPass()
Definition: AMDGPUAttributor.cpp:794
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
ImplicitAttrs
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Definition: AMDGPUAttributor.cpp:43
llvm::Function
Definition: Function.h:60
ImplicitArgumentPositions
ImplicitArgumentPositions
Definition: AMDGPUAttributor.cpp:28
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:629
llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:91
llvm::AAPointerInfo::Access
An access description.
Definition: Attributor.h:5006
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:373
ImplicitArgumentMask
ImplicitArgumentMask
Definition: AMDGPUAttributor.cpp:35
llvm::StateWrapper
Helper to tie a abstract state implementation to an abstract attribute.
Definition: Attributor.h:2897
llvm::AMDGPU::getAmdhsaCodeObjectVersion
unsigned getAmdhsaCodeObjectVersion()
Definition: AMDGPUBaseInfo.cpp:149
LAST_ARG_POS
@ LAST_ARG_POS
Definition: AMDGPUAttributor.cpp:30
llvm::InformationCache
Data structure to hold cached (LLVM-IR) information.
Definition: Attributor.h:1072
llvm::AttributorConfig
Configuration for the Attributor.
Definition: Attributor.h:1318
llvm::AAPointerInfo::Access::getRemoteInst
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
Definition: Attributor.h:5073
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::AAPotentialValues::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4705
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:150
llvm::AbstractCallSite
AbstractCallSite.
Definition: AbstractCallSite.h:50
llvm::AACallEdges::hasNonAsmUnknownCallee
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
funcRequiresHostcallPtr
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
Definition: AMDGPUAttributor.cpp:120
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ARMBuildAttrs::Allowed
@ Allowed
Definition: ARMBuildAttributes.h:126
llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:846
llvm::clampStateAndIndicateChange
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition: Attributor.h:3130
TargetMachine.h
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5296
GCNSubtarget.h
llvm::ChangeStatus
ChangeStatus
{
Definition: Attributor.h:409
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
NOT_IMPLICIT_INPUT
@ NOT_IMPLICIT_INPUT
Definition: AMDGPUAttributor.cpp:36
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
llvm::IRPosition::getPositionKind
Kind getPositionKind() const
Return the associated position kind.
Definition: Attributor.h:743
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::IntegerRangeState
State for an integer range.
Definition: Attributor.h:2648
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1831
llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
llvm::ChangeStatus::UNCHANGED
@ UNCHANGED
isDSAddress
static bool isDSAddress(const Constant *C)
Definition: AMDGPUAttributor.cpp:110
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUAttributor.cpp:22
llvm::DenseSet< const char * >
Attributor.h
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition
unsigned getMultigridSyncArgImplicitArgPosition()
Definition: AMDGPUBaseInfo.cpp:153
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AbstractAttribute
Base struct for all "concrete attribute" deductions.
Definition: Attributor.h:3002
llvm::AAPointerInfo::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5164
llvm::DepClassTy::REQUIRED
@ REQUIRED
The target cannot be valid if the source is not.
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
llvm::AACallEdges::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4829
llvm::AACallEdges::getOptimisticEdges
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:63
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::IRPosition::IRP_FUNCTION
@ IRP_FUNCTION
An attribute for a function (scope).
Definition: Attributor.h:515
llvm::DenseMap
Definition: DenseMap.h:714
intrinsicToAttrMask
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID)
Definition: AMDGPUAttributor.cpp:53
I
#define I(x, y, z)
Definition: MD5.cpp:58
TargetPassConfig.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
llvm::IRPosition
Helper to describe and deal with positions in the LLVM-IR.
Definition: Attributor.h:503
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::User::isDroppable
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:115
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
castRequiresQueuePtr
static bool castRequiresQueuePtr(unsigned SrcAS)
Definition: AMDGPUAttributor.cpp:106
llvm::CGSCC
@ CGSCC
Definition: Attributor.h:5207
llvm::AAPointerInfo::forallInterferingAccesses
virtual bool forallInterferingAccesses(AA::OffsetAndSize OAS, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with OAS and return true if all such accesses were known...
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AAPointerInfo
An abstract interface for struct information.
Definition: Attributor.h:4983
llvm::IRAttributeManifest::manifestAttrs
static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, const ArrayRef< Attribute > &DeducedAttrs, bool ForceReplace=false)
Definition: Attributor.cpp:797
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1823
llvm::ChangeStatus::CHANGED
@ CHANGED
llvm::AACallEdges
An abstract state for querying live call edges.
Definition: Attributor.h:4788
llvm::AA::OffsetAndSize
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Definition: Attributor.h:215
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:201
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm::Attributor
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition: Attributor.h:1391
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::pdb::DbgHeaderType::Max
@ Max
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:143
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:85
llvm::AnalysisGetter
Wrapper for FunctoinAnalysisManager.
Definition: Attributor.h:1045
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:659
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:377
llvm::AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET
@ QUEUE_PTR_OFFSET
Definition: SIDefines.h:898
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::SetVector< Function * >
llvm::IRPosition::callsite_returned
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition: Attributor.h:571
llvm::AMDGPU::getHostcallImplicitArgPosition
unsigned getHostcallImplicitArgPosition()
Definition: AMDGPUBaseInfo.cpp:170
llvm::AbstractAttribute::getIdAddr
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
ALL_ARGUMENT_MASK
@ ALL_ARGUMENT_MASK
Definition: AMDGPUAttributor.cpp:38
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
AMDGPUBaseInfo.h
llvm::AMDGPU::ImplicitArg::HEAP_PTR_OFFSET
@ HEAP_PTR_OFFSET
Definition: SIDefines.h:895
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39