LLVM  15.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/IR/IntrinsicsAMDGPU.h"
18 #include "llvm/IR/IntrinsicsR600.h"
21 
22 #define DEBUG_TYPE "amdgpu-attributor"
23 
24 using namespace llvm;
25 
26 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
27 
29  #include "AMDGPUAttributes.def"
31 };
32 
33 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
34 
37  #include "AMDGPUAttributes.def"
39 };
40 
41 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
42 static constexpr std::pair<ImplicitArgumentMask,
44  #include "AMDGPUAttributes.def"
45 };
46 
47 // We do not need to note the x workitem or workgroup id because they are always
48 // initialized.
49 //
50 // TODO: We should not add the attributes if the known compile time workgroup
51 // size is 1 for y/z.
53 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
54  bool HasApertureRegs, bool SupportsGetDoorBellID) {
55  unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
56  switch (ID) {
57  case Intrinsic::amdgcn_workitem_id_x:
58  NonKernelOnly = true;
59  return WORKITEM_ID_X;
60  case Intrinsic::amdgcn_workgroup_id_x:
61  NonKernelOnly = true;
62  return WORKGROUP_ID_X;
63  case Intrinsic::amdgcn_workitem_id_y:
64  case Intrinsic::r600_read_tidig_y:
65  return WORKITEM_ID_Y;
66  case Intrinsic::amdgcn_workitem_id_z:
67  case Intrinsic::r600_read_tidig_z:
68  return WORKITEM_ID_Z;
69  case Intrinsic::amdgcn_workgroup_id_y:
70  case Intrinsic::r600_read_tgid_y:
71  return WORKGROUP_ID_Y;
72  case Intrinsic::amdgcn_workgroup_id_z:
73  case Intrinsic::r600_read_tgid_z:
74  return WORKGROUP_ID_Z;
75  case Intrinsic::amdgcn_dispatch_ptr:
76  return DISPATCH_PTR;
77  case Intrinsic::amdgcn_dispatch_id:
78  return DISPATCH_ID;
79  case Intrinsic::amdgcn_implicitarg_ptr:
80  return IMPLICIT_ARG_PTR;
81  // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
82  // queue_ptr.
83  case Intrinsic::amdgcn_queue_ptr:
84  NeedsImplicit = (CodeObjectVersion == 5);
85  return QUEUE_PTR;
86  case Intrinsic::amdgcn_is_shared:
87  case Intrinsic::amdgcn_is_private:
88  if (HasApertureRegs)
89  return NOT_IMPLICIT_INPUT;
90  // Under V5, we need implicitarg_ptr + offsets to access private_base or
91  // shared_base. For pre-V5, however, need to access them through queue_ptr +
92  // offsets.
93  return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
94  case Intrinsic::trap:
95  if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
96  return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
97  NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
98  return QUEUE_PTR;
99  default:
100  return NOT_IMPLICIT_INPUT;
101  }
102 }
103 
104 static bool castRequiresQueuePtr(unsigned SrcAS) {
105  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
106 }
107 
108 static bool isDSAddress(const Constant *C) {
109  const GlobalValue *GV = dyn_cast<GlobalValue>(C);
110  if (!GV)
111  return false;
112  unsigned AS = GV->getAddressSpace();
114 }
115 
116 /// Returns true if the function requires the implicit argument be passed
117 /// regardless of the function contents.
118 static bool funcRequiresHostcallPtr(const Function &F) {
119  // Sanitizers require the hostcall buffer passed in the implicit arguments.
120  return F.hasFnAttribute(Attribute::SanitizeAddress) ||
121  F.hasFnAttribute(Attribute::SanitizeThread) ||
122  F.hasFnAttribute(Attribute::SanitizeMemory) ||
123  F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
124  F.hasFnAttribute(Attribute::SanitizeMemTag);
125 }
126 
127 namespace {
128 class AMDGPUInformationCache : public InformationCache {
129 public:
130  AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
133  : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
134  TargetMachine &TM;
135 
136  enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
137 
138  /// Check if the subtarget has aperture regs.
139  bool hasApertureRegs(Function &F) {
140  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
141  return ST.hasApertureRegs();
142  }
143 
144  /// Check if the subtarget supports GetDoorbellID.
145  bool supportsGetDoorbellID(Function &F) {
146  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
147  return ST.supportsGetDoorbellID();
148  }
149 
150  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
151  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
152  return ST.getFlatWorkGroupSizes(F);
153  }
154 
155  std::pair<unsigned, unsigned>
156  getMaximumFlatWorkGroupRange(const Function &F) {
157  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
158  return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
159  }
160 
161 private:
162  /// Check if the ConstantExpr \p CE requires the queue pointer.
163  static bool visitConstExpr(const ConstantExpr *CE) {
164  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
165  unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
166  return castRequiresQueuePtr(SrcAS);
167  }
168  return false;
169  }
170 
171  /// Get the constant access bitmap for \p C.
172  uint8_t getConstantAccess(const Constant *C) {
173  auto It = ConstantStatus.find(C);
174  if (It != ConstantStatus.end())
175  return It->second;
176 
177  uint8_t Result = 0;
178  if (isDSAddress(C))
179  Result = DS_GLOBAL;
180 
181  if (const auto *CE = dyn_cast<ConstantExpr>(C))
182  if (visitConstExpr(CE))
183  Result |= ADDR_SPACE_CAST;
184 
185  for (const Use &U : C->operands()) {
186  const auto *OpC = dyn_cast<Constant>(U);
187  if (!OpC)
188  continue;
189 
190  Result |= getConstantAccess(OpC);
191  }
192  return Result;
193  }
194 
195 public:
196  /// Returns true if \p Fn needs the queue pointer because of \p C.
197  bool needsQueuePtr(const Constant *C, Function &Fn) {
198  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
199  bool HasAperture = hasApertureRegs(Fn);
200 
201  // No need to explore the constants.
202  if (!IsNonEntryFunc && HasAperture)
203  return false;
204 
205  uint8_t Access = getConstantAccess(C);
206 
207  // We need to trap on DS globals in non-entry functions.
208  if (IsNonEntryFunc && (Access & DS_GLOBAL))
209  return true;
210 
211  return !HasAperture && (Access & ADDR_SPACE_CAST);
212  }
213 
214 private:
215  /// Used to determine if the Constant needs the queue pointer.
217 };
218 
219 struct AAAMDAttributes : public StateWrapper<
220  BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
223 
224  AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
225 
226  /// Create an abstract attribute view for the position \p IRP.
227  static AAAMDAttributes &createForPosition(const IRPosition &IRP,
228  Attributor &A);
229 
230  /// See AbstractAttribute::getName().
231  const std::string getName() const override { return "AAAMDAttributes"; }
232 
233  /// See AbstractAttribute::getIdAddr().
234  const char *getIdAddr() const override { return &ID; }
235 
236  /// This function should return true if the type of the \p AA is
237  /// AAAMDAttributes.
238  static bool classof(const AbstractAttribute *AA) {
239  return (AA->getIdAddr() == &ID);
240  }
241 
242  /// Unique ID (due to the unique address)
243  static const char ID;
244 };
245 const char AAAMDAttributes::ID = 0;
246 
247 struct AAUniformWorkGroupSize
248  : public StateWrapper<BooleanState, AbstractAttribute> {
250  AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
251 
252  /// Create an abstract attribute view for the position \p IRP.
253  static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
254  Attributor &A);
255 
256  /// See AbstractAttribute::getName().
257  const std::string getName() const override {
258  return "AAUniformWorkGroupSize";
259  }
260 
261  /// See AbstractAttribute::getIdAddr().
262  const char *getIdAddr() const override { return &ID; }
263 
264  /// This function should return true if the type of the \p AA is
265  /// AAAMDAttributes.
266  static bool classof(const AbstractAttribute *AA) {
267  return (AA->getIdAddr() == &ID);
268  }
269 
270  /// Unique ID (due to the unique address)
271  static const char ID;
272 };
273 const char AAUniformWorkGroupSize::ID = 0;
274 
275 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
276  AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
277  : AAUniformWorkGroupSize(IRP, A) {}
278 
279  void initialize(Attributor &A) override {
280  Function *F = getAssociatedFunction();
281  CallingConv::ID CC = F->getCallingConv();
282 
283  if (CC != CallingConv::AMDGPU_KERNEL)
284  return;
285 
286  bool InitialValue = false;
287  if (F->hasFnAttribute("uniform-work-group-size"))
288  InitialValue = F->getFnAttribute("uniform-work-group-size")
289  .getValueAsString()
290  .equals("true");
291 
292  if (InitialValue)
293  indicateOptimisticFixpoint();
294  else
295  indicatePessimisticFixpoint();
296  }
297 
298  ChangeStatus updateImpl(Attributor &A) override {
300 
301  auto CheckCallSite = [&](AbstractCallSite CS) {
302  Function *Caller = CS.getInstruction()->getFunction();
303  LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
304  << "->" << getAssociatedFunction()->getName() << "\n");
305 
306  const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
307  *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
308 
309  Change = Change | clampStateAndIndicateChange(this->getState(),
310  CallerInfo.getState());
311 
312  return true;
313  };
314 
315  bool AllCallSitesKnown = true;
316  if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
317  return indicatePessimisticFixpoint();
318 
319  return Change;
320  }
321 
322  ChangeStatus manifest(Attributor &A) override {
323  SmallVector<Attribute, 8> AttrList;
324  LLVMContext &Ctx = getAssociatedFunction()->getContext();
325 
326  AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
327  getAssumed() ? "true" : "false"));
328  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
329  /* ForceReplace */ true);
330  }
331 
332  bool isValidState() const override {
333  // This state is always valid, even when the state is false.
334  return true;
335  }
336 
337  const std::string getAsStr() const override {
338  return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
339  }
340 
341  /// See AbstractAttribute::trackStatistics()
342  void trackStatistics() const override {}
343 };
344 
345 AAUniformWorkGroupSize &
346 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
347  Attributor &A) {
349  return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
351  "AAUniformWorkGroupSize is only valid for function position");
352 }
353 
354 struct AAAMDAttributesFunction : public AAAMDAttributes {
355  AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
356  : AAAMDAttributes(IRP, A) {}
357 
358  void initialize(Attributor &A) override {
359  Function *F = getAssociatedFunction();
360 
361  // If the function requires the implicit arg pointer due to sanitizers,
362  // assume it's needed even if explicitly marked as not requiring it.
363  const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
364  if (NeedsHostcall) {
365  removeAssumedBits(IMPLICIT_ARG_PTR);
366  removeAssumedBits(HOSTCALL_PTR);
367  }
368 
369  for (auto Attr : ImplicitAttrs) {
370  if (NeedsHostcall &&
371  (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
372  continue;
373 
374  if (F->hasFnAttribute(Attr.second))
375  addKnownBits(Attr.first);
376  }
377 
378  if (F->isDeclaration())
379  return;
380 
381  // Ignore functions with graphics calling conventions, these are currently
382  // not allowed to have kernel arguments.
383  if (AMDGPU::isGraphics(F->getCallingConv())) {
384  indicatePessimisticFixpoint();
385  return;
386  }
387  }
388 
389  ChangeStatus updateImpl(Attributor &A) override {
390  Function *F = getAssociatedFunction();
391  // The current assumed state used to determine a change.
392  auto OrigAssumed = getAssumed();
393 
394  // Check for Intrinsics and propagate attributes.
395  const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
396  *this, this->getIRPosition(), DepClassTy::REQUIRED);
397  if (AAEdges.hasNonAsmUnknownCallee())
398  return indicatePessimisticFixpoint();
399 
400  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
401 
402  bool NeedsImplicit = false;
403  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
404  bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
405  bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
406 
407  for (Function *Callee : AAEdges.getOptimisticEdges()) {
408  Intrinsic::ID IID = Callee->getIntrinsicID();
409  if (IID == Intrinsic::not_intrinsic) {
410  const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
411  *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
412  *this &= AAAMD;
413  continue;
414  }
415 
416  bool NonKernelOnly = false;
417  ImplicitArgumentMask AttrMask =
418  intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
419  HasApertureRegs, SupportsGetDoorbellID);
420  if (AttrMask != NOT_IMPLICIT_INPUT) {
421  if ((IsNonEntryFunc || !NonKernelOnly))
422  removeAssumedBits(AttrMask);
423  }
424  }
425 
426  // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
427  if (NeedsImplicit)
428  removeAssumedBits(IMPLICIT_ARG_PTR);
429 
430  if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
431  // Under V5, we need implicitarg_ptr + offsets to access private_base or
432  // shared_base. We do not actually need queue_ptr.
434  removeAssumedBits(IMPLICIT_ARG_PTR);
435  else
436  removeAssumedBits(QUEUE_PTR);
437  }
438 
439  if (funcRetrievesMultigridSyncArg(A)) {
440  assert(!isAssumed(IMPLICIT_ARG_PTR) &&
441  "multigrid_sync_arg needs implicitarg_ptr");
442  removeAssumedBits(MULTIGRID_SYNC_ARG);
443  }
444 
445  if (funcRetrievesHostcallPtr(A)) {
446  assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
447  removeAssumedBits(HOSTCALL_PTR);
448  }
449 
450  if (funcRetrievesHeapPtr(A)) {
451  assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
452  removeAssumedBits(HEAP_PTR);
453  }
454 
455  if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
456  assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
457  removeAssumedBits(QUEUE_PTR);
458  }
459 
460  return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
462  }
463 
464  ChangeStatus manifest(Attributor &A) override {
465  SmallVector<Attribute, 8> AttrList;
466  LLVMContext &Ctx = getAssociatedFunction()->getContext();
467 
468  for (auto Attr : ImplicitAttrs) {
469  if (isKnown(Attr.first))
470  AttrList.push_back(Attribute::get(Ctx, Attr.second));
471  }
472 
473  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
474  /* ForceReplace */ true);
475  }
476 
477  const std::string getAsStr() const override {
478  std::string Str;
479  raw_string_ostream OS(Str);
480  OS << "AMDInfo[";
481  for (auto Attr : ImplicitAttrs)
482  OS << ' ' << Attr.second;
483  OS << " ]";
484  return OS.str();
485  }
486 
487  /// See AbstractAttribute::trackStatistics()
488  void trackStatistics() const override {}
489 
490 private:
491  bool checkForQueuePtr(Attributor &A) {
492  Function *F = getAssociatedFunction();
493  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
494 
495  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
496 
497  bool NeedsQueuePtr = false;
498 
499  auto CheckAddrSpaceCasts = [&](Instruction &I) {
500  unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
501  if (castRequiresQueuePtr(SrcAS)) {
502  NeedsQueuePtr = true;
503  return false;
504  }
505  return true;
506  };
507 
508  bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
509 
510  // `checkForAllInstructions` is much more cheaper than going through all
511  // instructions, try it first.
512 
513  // The queue pointer is not needed if aperture regs is present.
514  if (!HasApertureRegs) {
515  bool UsedAssumedInformation = false;
516  A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
517  {Instruction::AddrSpaceCast},
518  UsedAssumedInformation);
519  }
520 
521  // If we found that we need the queue pointer, nothing else to do.
522  if (NeedsQueuePtr)
523  return true;
524 
525  if (!IsNonEntryFunc && HasApertureRegs)
526  return false;
527 
528  for (BasicBlock &BB : *F) {
529  for (Instruction &I : BB) {
530  for (const Use &U : I.operands()) {
531  if (const auto *C = dyn_cast<Constant>(U)) {
532  if (InfoCache.needsQueuePtr(C, *F))
533  return true;
534  }
535  }
536  }
537  }
538 
539  return false;
540  }
541 
542  bool funcRetrievesMultigridSyncArg(Attributor &A) {
544  AAPointerInfo::OffsetAndSize OAS(Pos, 8);
545  return funcRetrievesImplicitKernelArg(A, OAS);
546  }
547 
548  bool funcRetrievesHostcallPtr(Attributor &A) {
550  AAPointerInfo::OffsetAndSize OAS(Pos, 8);
551  return funcRetrievesImplicitKernelArg(A, OAS);
552  }
553 
554  bool funcRetrievesHeapPtr(Attributor &A) {
556  return false;
558  return funcRetrievesImplicitKernelArg(A, OAS);
559  }
560 
561  bool funcRetrievesQueuePtr(Attributor &A) {
563  return false;
565  return funcRetrievesImplicitKernelArg(A, OAS);
566  }
567 
568  bool funcRetrievesImplicitKernelArg(Attributor &A,
570  // Check if this is a call to the implicitarg_ptr builtin and it
571  // is used to retrieve the hostcall pointer. The implicit arg for
572  // hostcall is not used only if every use of the implicitarg_ptr
573  // is a load that clearly does not retrieve any byte of the
574  // hostcall pointer. We check this by tracing all the uses of the
575  // initial call to the implicitarg_ptr intrinsic.
576  auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
577  auto &Call = cast<CallBase>(I);
578  if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
579  return true;
580 
581  const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
583 
584  return PointerInfoAA.forallInterferingAccesses(
585  OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
586  return Acc.getRemoteInst()->isDroppable();
587  });
588  };
589 
590  bool UsedAssumedInformation = false;
591  return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
592  UsedAssumedInformation);
593  }
594 };
595 
596 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
597  Attributor &A) {
599  return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
600  llvm_unreachable("AAAMDAttributes is only valid for function position");
601 }
602 
603 /// Propagate amdgpu-flat-work-group-size attribute.
604 struct AAAMDFlatWorkGroupSize
605  : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
607  AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
608  : Base(IRP, 32) {}
609 
610  /// See AbstractAttribute::getState(...).
611  IntegerRangeState &getState() override { return *this; }
612  const IntegerRangeState &getState() const override { return *this; }
613 
614  void initialize(Attributor &A) override {
615  Function *F = getAssociatedFunction();
616  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
617  unsigned MinGroupSize, MaxGroupSize;
618  std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
619  intersectKnown(
620  ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
621 
622  if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
623  indicatePessimisticFixpoint();
624  }
625 
626  ChangeStatus updateImpl(Attributor &A) override {
628 
629  auto CheckCallSite = [&](AbstractCallSite CS) {
630  Function *Caller = CS.getInstruction()->getFunction();
631  LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
632  << "->" << getAssociatedFunction()->getName() << '\n');
633 
634  const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
635  *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
636 
637  Change |=
638  clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
639 
640  return true;
641  };
642 
643  bool AllCallSitesKnown = true;
644  if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
645  return indicatePessimisticFixpoint();
646 
647  return Change;
648  }
649 
650  ChangeStatus manifest(Attributor &A) override {
651  SmallVector<Attribute, 8> AttrList;
652  Function *F = getAssociatedFunction();
653  LLVMContext &Ctx = F->getContext();
654 
655  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
656  unsigned Min, Max;
657  std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
658 
659  // Don't add the attribute if it's the implied default.
660  if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
662 
663  SmallString<10> Buffer;
664  raw_svector_ostream OS(Buffer);
665  OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
666 
667  AttrList.push_back(
668  Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
669  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
670  /* ForceReplace */ true);
671  }
672 
673  const std::string getAsStr() const override {
674  std::string Str;
675  raw_string_ostream OS(Str);
676  OS << "AMDFlatWorkGroupSize[";
677  OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
678  OS << ']';
679  return OS.str();
680  }
681 
682  /// See AbstractAttribute::trackStatistics()
683  void trackStatistics() const override {}
684 
685  /// Create an abstract attribute view for the position \p IRP.
686  static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
687  Attributor &A);
688 
689  /// See AbstractAttribute::getName()
690  const std::string getName() const override {
691  return "AAAMDFlatWorkGroupSize";
692  }
693 
694  /// See AbstractAttribute::getIdAddr()
695  const char *getIdAddr() const override { return &ID; }
696 
697  /// This function should return true if the type of the \p AA is
698  /// AAAMDFlatWorkGroupSize
699  static bool classof(const AbstractAttribute *AA) {
700  return (AA->getIdAddr() == &ID);
701  }
702 
703  /// Unique ID (due to the unique address)
704  static const char ID;
705 };
706 
707 const char AAAMDFlatWorkGroupSize::ID = 0;
708 
709 AAAMDFlatWorkGroupSize &
710 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
711  Attributor &A) {
713  return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
715  "AAAMDFlatWorkGroupSize is only valid for function position");
716 }
717 
718 class AMDGPUAttributor : public ModulePass {
719 public:
720  AMDGPUAttributor() : ModulePass(ID) {}
721 
722  /// doInitialization - Virtual method overridden by subclasses to do
723  /// any necessary initialization before any pass is run.
724  bool doInitialization(Module &) override {
725  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
726  if (!TPC)
727  report_fatal_error("TargetMachine is required");
728 
729  TM = &TPC->getTM<TargetMachine>();
730  return false;
731  }
732 
733  bool runOnModule(Module &M) override {
734  SetVector<Function *> Functions;
735  AnalysisGetter AG;
736  for (Function &F : M) {
737  if (!F.isIntrinsic())
738  Functions.insert(&F);
739  }
740 
741  CallGraphUpdater CGUpdater;
743  AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
747 
748  AttributorConfig AC(CGUpdater);
749  AC.Allowed = &Allowed;
750  AC.IsModulePass = true;
751  AC.DefaultInitializeLiveInternals = false;
752 
753  Attributor A(Functions, InfoCache, AC);
754 
755  for (Function &F : M) {
756  if (!F.isIntrinsic()) {
757  A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
758  A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
759  if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
760  A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
761  }
762  }
763  }
764 
765  ChangeStatus Change = A.run();
766  return Change == ChangeStatus::CHANGED;
767  }
768 
769  StringRef getPassName() const override { return "AMDGPU Attributor"; }
770  TargetMachine *TM;
771  static char ID;
772 };
773 } // namespace
774 
775 char AMDGPUAttributor::ID = 0;
776 
777 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
778 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
llvm::CallGraphUpdater
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
Definition: CallGraphUpdater.h:29
llvm::IRPosition::function
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition: Attributor.h:419
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:42
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::createAMDGPUAttributorPass
Pass * createAMDGPUAttributorPass()
Definition: AMDGPUAttributor.cpp:777
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
ImplicitAttrs
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Definition: AMDGPUAttributor.cpp:43
llvm::Function
Definition: Function.h:60
ImplicitArgumentPositions
ImplicitArgumentPositions
Definition: AMDGPUAttributor.cpp:28
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:632
llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:91
llvm::AAPointerInfo::Access
An access description.
Definition: Attributor.h:4814
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
ImplicitArgumentMask
ImplicitArgumentMask
Definition: AMDGPUAttributor.cpp:35
llvm::StateWrapper
Helper to tie a abstract state implementation to an abstract attribute.
Definition: Attributor.h:2763
llvm::AMDGPU::getAmdhsaCodeObjectVersion
unsigned getAmdhsaCodeObjectVersion()
Definition: AMDGPUBaseInfo.cpp:139
LAST_ARG_POS
@ LAST_ARG_POS
Definition: AMDGPUAttributor.cpp:30
llvm::InformationCache
Data structure to hold cached (LLVM-IR) information.
Definition: Attributor.h:945
llvm::AttributorConfig
Configuration for the Attributor.
Definition: Attributor.h:1191
llvm::AAPointerInfo::Access::getRemoteInst
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
Definition: Attributor.h:4855
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:116
llvm::AbstractCallSite
AbstractCallSite.
Definition: AbstractCallSite.h:50
llvm::AACallEdges::hasNonAsmUnknownCallee
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
funcRequiresHostcallPtr
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
Definition: AMDGPUAttributor.cpp:118
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:910
llvm::clampStateAndIndicateChange
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition: Attributor.h:2996
TargetMachine.h
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5265
GCNSubtarget.h
llvm::ChangeStatus
ChangeStatus
{
Definition: Attributor.h:282
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
NOT_IMPLICIT_INPUT
@ NOT_IMPLICIT_INPUT
Definition: AMDGPUAttributor.cpp:36
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
llvm::IRPosition::getPositionKind
Kind getPositionKind() const
Return the associated position kind.
Definition: Attributor.h:616
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::IntegerRangeState
State for an integer range.
Definition: Attributor.h:2504
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1570
llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
llvm::ChangeStatus::UNCHANGED
@ UNCHANGED
isDSAddress
static bool isDSAddress(const Constant *C)
Definition: AMDGPUAttributor.cpp:108
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUAttributor.cpp:22
llvm::AAPointerInfo::forallInterferingAccesses
virtual bool forallInterferingAccesses(OffsetAndSize OAS, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with OAS and return true if all such accesses were known...
llvm::DenseSet< const char * >
Attributor.h
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition
unsigned getMultigridSyncArgImplicitArgPosition()
Definition: AMDGPUBaseInfo.cpp:143
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::AbstractAttribute
Base struct for all "concrete attribute" deductions.
Definition: Attributor.h:2868
llvm::AAPointerInfo::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4963
llvm::DepClassTy::REQUIRED
@ REQUIRED
The target cannot be valid if the source is not.
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
llvm::AACallEdges::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4649
llvm::AACallEdges::getOptimisticEdges
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:63
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::IRPosition::IRP_FUNCTION
@ IRP_FUNCTION
An attribute for a function (scope).
Definition: Attributor.h:388
llvm::DenseMap
Definition: DenseMap.h:716
intrinsicToAttrMask
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID)
Definition: AMDGPUAttributor.cpp:53
I
#define I(x, y, z)
Definition: MD5.cpp:58
TargetPassConfig.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::ARMBuildAttrs::Allowed
@ Allowed
Definition: ARMBuildAttributes.h:126
llvm::IRPosition
Helper to describe and deal with positions in the LLVM-IR.
Definition: Attributor.h:376
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::User::isDroppable
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:109
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
castRequiresQueuePtr
static bool castRequiresQueuePtr(unsigned SrcAS)
Definition: AMDGPUAttributor.cpp:104
llvm::CGSCC
@ CGSCC
Definition: Attributor.h:5006
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AAPointerInfo
An abstract interface for struct information.
Definition: Attributor.h:4804
llvm::IRAttributeManifest::manifestAttrs
static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, const ArrayRef< Attribute > &DeducedAttrs, bool ForceReplace=false)
Definition: Attributor.cpp:743
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1562
llvm::ChangeStatus::CHANGED
@ CHANGED
llvm::AACallEdges
An abstract state for querying live call edges.
Definition: Attributor.h:4608
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:971
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::Attributor
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition: Attributor.h:1264
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:117
AA
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::pdb::DbgHeaderType::Max
@ Max
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:142
llvm::AAPointerInfo::OffsetAndSize
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Definition: Attributor.h:4908
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:86
llvm::AnalysisGetter
Wrapper for FunctoinAnalysisManager.
Definition: Attributor.h:918
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:662
llvm::AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET
@ QUEUE_PTR_OFFSET
Definition: SIDefines.h:789
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::SetVector< Function * >
llvm::IRPosition::callsite_returned
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition: Attributor.h:444
llvm::AMDGPU::getHostcallImplicitArgPosition
unsigned getHostcallImplicitArgPosition()
Definition: AMDGPUBaseInfo.cpp:160
ALL_ARGUMENT_MASK
@ ALL_ARGUMENT_MASK
Definition: AMDGPUAttributor.cpp:38
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
AMDGPUBaseInfo.h
llvm::AMDGPU::ImplicitArg::HEAP_PTR_OFFSET
@ HEAP_PTR_OFFSET
Definition: SIDefines.h:786
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37