17 #include "llvm/IR/IntrinsicsAMDGPU.h"
18 #include "llvm/IR/IntrinsicsR600.h"
22 #define DEBUG_TYPE "amdgpu-attributor"
26 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
29 #include "AMDGPUAttributes.def"
33 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
37 #include "AMDGPUAttributes.def"
41 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
44 #include "AMDGPUAttributes.def"
54 bool HasApertureRegs,
bool SupportsGetDoorBellID) {
57 case Intrinsic::amdgcn_workitem_id_x:
60 case Intrinsic::amdgcn_workgroup_id_x:
62 return WORKGROUP_ID_X;
63 case Intrinsic::amdgcn_workitem_id_y:
64 case Intrinsic::r600_read_tidig_y:
66 case Intrinsic::amdgcn_workitem_id_z:
67 case Intrinsic::r600_read_tidig_z:
69 case Intrinsic::amdgcn_workgroup_id_y:
70 case Intrinsic::r600_read_tgid_y:
71 return WORKGROUP_ID_Y;
72 case Intrinsic::amdgcn_workgroup_id_z:
73 case Intrinsic::r600_read_tgid_z:
74 return WORKGROUP_ID_Z;
75 case Intrinsic::amdgcn_dispatch_ptr:
77 case Intrinsic::amdgcn_dispatch_id:
79 case Intrinsic::amdgcn_implicitarg_ptr:
80 return IMPLICIT_ARG_PTR;
83 case Intrinsic::amdgcn_queue_ptr:
84 NeedsImplicit = (CodeObjectVersion == 5);
86 case Intrinsic::amdgcn_is_shared:
87 case Intrinsic::amdgcn_is_private:
93 return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
95 if (SupportsGetDoorBellID)
97 NeedsImplicit = (CodeObjectVersion == 5);
120 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
121 F.hasFnAttribute(Attribute::SanitizeThread) ||
122 F.hasFnAttribute(Attribute::SanitizeMemory) ||
123 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
124 F.hasFnAttribute(Attribute::SanitizeMemTag);
136 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
141 return ST.hasApertureRegs();
145 bool supportsGetDoorbellID(
Function &
F) {
147 return ST.supportsGetDoorbellID();
150 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(
const Function &
F) {
152 return ST.getFlatWorkGroupSizes(
F);
155 std::pair<unsigned, unsigned>
156 getMaximumFlatWorkGroupRange(
const Function &
F) {
158 return {
ST.getMinFlatWorkGroupSize(),
ST.getMaxFlatWorkGroupSize()};
164 if (
CE->getOpcode() == Instruction::AddrSpaceCast) {
165 unsigned SrcAS =
CE->getOperand(0)->getType()->getPointerAddressSpace();
172 uint8_t getConstantAccess(
const Constant *
C) {
173 auto It = ConstantStatus.find(
C);
174 if (It != ConstantStatus.end())
181 if (
const auto *CE = dyn_cast<ConstantExpr>(
C))
182 if (visitConstExpr(CE))
183 Result |= ADDR_SPACE_CAST;
185 for (
const Use &U :
C->operands()) {
186 const auto *OpC = dyn_cast<Constant>(U);
190 Result |= getConstantAccess(OpC);
199 bool HasAperture = hasApertureRegs(Fn);
202 if (!IsNonEntryFunc && HasAperture)
205 uint8_t Access = getConstantAccess(
C);
208 if (IsNonEntryFunc && (Access & DS_GLOBAL))
211 return !HasAperture && (Access & ADDR_SPACE_CAST);
220 BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
227 static AAAMDAttributes &createForPosition(
const IRPosition &IRP,
231 const std::string
getName()
const override {
return "AAAMDAttributes"; }
234 const char *getIdAddr()
const override {
return &
ID; }
239 return (
AA->getIdAddr() == &
ID);
243 static const char ID;
247 struct AAUniformWorkGroupSize
248 :
public StateWrapper<BooleanState, AbstractAttribute> {
253 static AAUniformWorkGroupSize &createForPosition(
const IRPosition &IRP,
257 const std::string
getName()
const override {
258 return "AAUniformWorkGroupSize";
262 const char *getIdAddr()
const override {
return &
ID; }
267 return (
AA->getIdAddr() == &
ID);
271 static const char ID;
275 struct AAUniformWorkGroupSizeFunction :
public AAUniformWorkGroupSize {
277 : AAUniformWorkGroupSize(IRP,
A) {}
286 bool InitialValue =
false;
287 if (
F->hasFnAttribute(
"uniform-work-group-size"))
288 InitialValue =
F->getFnAttribute(
"uniform-work-group-size")
293 indicateOptimisticFixpoint();
295 indicatePessimisticFixpoint();
304 <<
"->" << getAssociatedFunction()->
getName() <<
"\n");
306 const auto &CallerInfo =
A.getAAFor<AAUniformWorkGroupSize>(
310 CallerInfo.getState());
315 bool AllCallSitesKnown =
true;
316 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
317 return indicatePessimisticFixpoint();
324 LLVMContext &Ctx = getAssociatedFunction()->getContext();
327 getAssumed() ?
"true" :
"false"));
332 bool isValidState()
const override {
337 const std::string getAsStr()
const override {
342 void trackStatistics()
const override {}
345 AAUniformWorkGroupSize &
346 AAUniformWorkGroupSize::createForPosition(
const IRPosition &IRP,
349 return *
new (
A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
351 "AAUniformWorkGroupSize is only valid for function position");
354 struct AAAMDAttributesFunction :
public AAAMDAttributes {
356 : AAAMDAttributes(IRP,
A) {}
365 removeAssumedBits(IMPLICIT_ARG_PTR);
366 removeAssumedBits(HOSTCALL_PTR);
371 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
374 if (
F->hasFnAttribute(Attr.second))
375 addKnownBits(Attr.first);
378 if (
F->isDeclaration())
384 indicatePessimisticFixpoint();
392 auto OrigAssumed = getAssumed();
398 return indicatePessimisticFixpoint();
402 bool NeedsImplicit =
false;
403 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
404 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
405 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*
F);
410 const AAAMDAttributes &AAAMD =
A.getAAFor<AAAMDAttributes>(
416 bool NonKernelOnly =
false;
419 HasApertureRegs, SupportsGetDoorbellID);
421 if ((IsNonEntryFunc || !NonKernelOnly))
422 removeAssumedBits(AttrMask);
428 removeAssumedBits(IMPLICIT_ARG_PTR);
430 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
434 removeAssumedBits(IMPLICIT_ARG_PTR);
436 removeAssumedBits(QUEUE_PTR);
439 if (funcRetrievesMultigridSyncArg(A)) {
440 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
441 "multigrid_sync_arg needs implicitarg_ptr");
442 removeAssumedBits(MULTIGRID_SYNC_ARG);
445 if (funcRetrievesHostcallPtr(A)) {
446 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"hostcall needs implicitarg_ptr");
447 removeAssumedBits(HOSTCALL_PTR);
450 if (funcRetrievesHeapPtr(A)) {
451 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"heap_ptr needs implicitarg_ptr");
452 removeAssumedBits(HEAP_PTR);
455 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
456 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"queue_ptr needs implicitarg_ptr");
457 removeAssumedBits(QUEUE_PTR);
466 LLVMContext &Ctx = getAssociatedFunction()->getContext();
469 if (isKnown(Attr.first))
477 const std::string getAsStr()
const override {
482 OS <<
' ' << Attr.second;
488 void trackStatistics()
const override {}
495 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
497 bool NeedsQueuePtr =
false;
502 NeedsQueuePtr =
true;
508 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
514 if (!HasApertureRegs) {
515 bool UsedAssumedInformation =
false;
516 A.checkForAllInstructions(CheckAddrSpaceCasts, *
this,
517 {Instruction::AddrSpaceCast},
518 UsedAssumedInformation);
525 if (!IsNonEntryFunc && HasApertureRegs)
530 for (
const Use &U :
I.operands()) {
531 if (
const auto *
C = dyn_cast<Constant>(U)) {
532 if (InfoCache.needsQueuePtr(
C, *
F))
542 bool funcRetrievesMultigridSyncArg(
Attributor &A) {
545 return funcRetrievesImplicitKernelArg(A, OAS);
548 bool funcRetrievesHostcallPtr(
Attributor &A) {
551 return funcRetrievesImplicitKernelArg(A, OAS);
558 return funcRetrievesImplicitKernelArg(A, OAS);
565 return funcRetrievesImplicitKernelArg(A, OAS);
568 bool funcRetrievesImplicitKernelArg(
Attributor &A,
577 auto &
Call = cast<CallBase>(
I);
578 if (
Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
590 bool UsedAssumedInformation =
false;
591 return !
A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *
this,
592 UsedAssumedInformation);
596 AAAMDAttributes &AAAMDAttributes::createForPosition(
const IRPosition &IRP,
599 return *
new (
A.Allocator) AAAMDAttributesFunction(IRP, A);
604 struct AAAMDFlatWorkGroupSize
605 :
public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
616 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
617 unsigned MinGroupSize, MaxGroupSize;
618 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*
F);
623 indicatePessimisticFixpoint();
632 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
634 const auto &CallerInfo =
A.getAAFor<AAAMDFlatWorkGroupSize>(
643 bool AllCallSitesKnown =
true;
644 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
645 return indicatePessimisticFixpoint();
655 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
657 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*
F);
660 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
665 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
673 const std::string getAsStr()
const override {
676 OS <<
"AMDFlatWorkGroupSize[";
677 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
683 void trackStatistics()
const override {}
686 static AAAMDFlatWorkGroupSize &createForPosition(
const IRPosition &IRP,
690 const std::string
getName()
const override {
691 return "AAAMDFlatWorkGroupSize";
695 const char *getIdAddr()
const override {
return &
ID; }
700 return (
AA->getIdAddr() == &
ID);
704 static const char ID;
709 AAAMDFlatWorkGroupSize &
710 AAAMDFlatWorkGroupSize::createForPosition(
const IRPosition &IRP,
713 return *
new (
A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
715 "AAAMDFlatWorkGroupSize is only valid for function position");
724 bool doInitialization(
Module &)
override {
725 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
733 bool runOnModule(
Module &M)
override {
737 if (!
F.isIntrinsic())
743 AMDGPUInformationCache InfoCache(M, AG,
Allocator,
nullptr, *
TM);
750 AC.IsModulePass =
true;
751 AC.DefaultInitializeLiveInternals =
false;
756 if (!
F.isIntrinsic()) {
769 StringRef getPassName()
const override {
return "AMDGPU Attributor"; }