18#include "llvm/IR/IntrinsicsAMDGPU.h"
19#include "llvm/IR/IntrinsicsR600.h"
23#define DEBUG_TYPE "amdgpu-attributor"
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
34 #include "AMDGPUAttributes.def"
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
42 #include "AMDGPUAttributes.def"
46#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
49 #include "AMDGPUAttributes.def"
59 bool HasApertureRegs,
bool SupportsGetDoorBellID,
60 unsigned CodeObjectVersion) {
62 case Intrinsic::amdgcn_workitem_id_x:
65 case Intrinsic::amdgcn_workgroup_id_x:
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_lds_kernel_id:
82 case Intrinsic::amdgcn_dispatch_ptr:
84 case Intrinsic::amdgcn_dispatch_id:
86 case Intrinsic::amdgcn_implicitarg_ptr:
87 return IMPLICIT_ARG_PTR;
90 case Intrinsic::amdgcn_queue_ptr:
91 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
93 case Intrinsic::amdgcn_is_shared:
94 case Intrinsic::amdgcn_is_private:
100 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR :
102 case Intrinsic::trap:
103 if (SupportsGetDoorBellID)
106 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
129 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
130 F.hasFnAttribute(Attribute::SanitizeThread) ||
131 F.hasFnAttribute(Attribute::SanitizeMemory) ||
132 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
133 F.hasFnAttribute(Attribute::SanitizeMemTag);
147 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
152 return ST.hasApertureRegs();
156 bool supportsGetDoorbellID(
Function &
F) {
158 return ST.supportsGetDoorbellID();
161 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(
const Function &
F) {
163 return ST.getFlatWorkGroupSizes(
F);
166 std::pair<unsigned, unsigned>
167 getMaximumFlatWorkGroupRange(
const Function &
F) {
169 return {
ST.getMinFlatWorkGroupSize(),
ST.getMaxFlatWorkGroupSize()};
174 return CodeObjectVersion;
180 if (
CE->getOpcode() == Instruction::AddrSpaceCast) {
181 unsigned SrcAS =
CE->getOperand(0)->getType()->getPointerAddressSpace();
188 uint8_t getConstantAccess(
const Constant *
C) {
189 auto It = ConstantStatus.find(
C);
190 if (It != ConstantStatus.end())
197 if (
const auto *CE = dyn_cast<ConstantExpr>(
C))
198 if (visitConstExpr(CE))
199 Result |= ADDR_SPACE_CAST;
201 for (
const Use &U :
C->operands()) {
202 const auto *OpC = dyn_cast<Constant>(U);
206 Result |= getConstantAccess(OpC);
215 bool HasAperture = hasApertureRegs(Fn);
218 if (!IsNonEntryFunc && HasAperture)
221 uint8_t Access = getConstantAccess(
C);
224 if (IsNonEntryFunc && (Access & DS_GLOBAL))
227 return !HasAperture && (Access & ADDR_SPACE_CAST);
233 const unsigned CodeObjectVersion;
236struct AAAMDAttributes
237 :
public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
245 static AAAMDAttributes &createForPosition(
const IRPosition &IRP,
249 const std::string
getName()
const override {
return "AAAMDAttributes"; }
252 const char *getIdAddr()
const override {
return &
ID; }
261 static const char ID;
263const char AAAMDAttributes::ID = 0;
265struct AAUniformWorkGroupSize
266 :
public StateWrapper<BooleanState, AbstractAttribute> {
271 static AAUniformWorkGroupSize &createForPosition(
const IRPosition &IRP,
275 const std::string
getName()
const override {
276 return "AAUniformWorkGroupSize";
280 const char *getIdAddr()
const override {
return &
ID; }
289 static const char ID;
291const char AAUniformWorkGroupSize::ID = 0;
293struct AAUniformWorkGroupSizeFunction :
public AAUniformWorkGroupSize {
295 : AAUniformWorkGroupSize(IRP,
A) {}
304 bool InitialValue =
false;
305 if (
F->hasFnAttribute(
"uniform-work-group-size"))
306 InitialValue =
F->getFnAttribute(
"uniform-work-group-size")
311 indicateOptimisticFixpoint();
313 indicatePessimisticFixpoint();
322 <<
"->" << getAssociatedFunction()->
getName() <<
"\n");
324 const auto &CallerInfo =
A.getAAFor<AAUniformWorkGroupSize>(
328 CallerInfo.getState());
333 bool AllCallSitesKnown =
true;
334 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
335 return indicatePessimisticFixpoint();
342 LLVMContext &Ctx = getAssociatedFunction()->getContext();
345 getAssumed() ?
"true" :
"false"));
350 bool isValidState()
const override {
355 const std::string getAsStr()
const override {
356 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) +
"]";
360 void trackStatistics()
const override {}
363AAUniformWorkGroupSize &
364AAUniformWorkGroupSize::createForPosition(
const IRPosition &IRP,
367 return *
new (
A.Allocator) AAUniformWorkGroupSizeFunction(IRP,
A);
369 "AAUniformWorkGroupSize is only valid for function position");
372struct AAAMDAttributesFunction :
public AAAMDAttributes {
374 : AAAMDAttributes(IRP,
A) {}
383 removeAssumedBits(IMPLICIT_ARG_PTR);
384 removeAssumedBits(HOSTCALL_PTR);
389 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
392 if (
F->hasFnAttribute(Attr.second))
393 addKnownBits(Attr.first);
396 if (
F->isDeclaration())
401 if (AMDGPU::isGraphics(
F->getCallingConv())) {
402 indicatePessimisticFixpoint();
410 auto OrigAssumed = getAssumed();
414 *
this, this->getIRPosition(), DepClassTy::REQUIRED);
416 return indicatePessimisticFixpoint();
418 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(
F->getCallingConv());
420 bool NeedsImplicit =
false;
421 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
422 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
423 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*
F);
424 unsigned COV = InfoCache.getCodeObjectVersion();
429 const AAAMDAttributes &AAAMD =
A.getAAFor<AAAMDAttributes>(
435 bool NonKernelOnly =
false;
438 HasApertureRegs, SupportsGetDoorbellID, COV);
440 if ((IsNonEntryFunc || !NonKernelOnly))
441 removeAssumedBits(AttrMask);
447 removeAssumedBits(IMPLICIT_ARG_PTR);
449 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(
A)) {
453 removeAssumedBits(IMPLICIT_ARG_PTR);
455 removeAssumedBits(QUEUE_PTR);
458 if (funcRetrievesMultigridSyncArg(
A, COV)) {
459 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
460 "multigrid_sync_arg needs implicitarg_ptr");
461 removeAssumedBits(MULTIGRID_SYNC_ARG);
464 if (funcRetrievesHostcallPtr(
A, COV)) {
465 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"hostcall needs implicitarg_ptr");
466 removeAssumedBits(HOSTCALL_PTR);
469 if (funcRetrievesHeapPtr(
A, COV)) {
470 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"heap_ptr needs implicitarg_ptr");
471 removeAssumedBits(HEAP_PTR);
474 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(
A, COV)) {
475 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"queue_ptr needs implicitarg_ptr");
476 removeAssumedBits(QUEUE_PTR);
479 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(
A)) {
480 removeAssumedBits(LDS_KERNEL_ID);
483 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(
A, COV))
484 removeAssumedBits(DEFAULT_QUEUE);
486 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(
A, COV))
487 removeAssumedBits(COMPLETION_ACTION);
489 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
490 : ChangeStatus::UNCHANGED;
495 LLVMContext &Ctx = getAssociatedFunction()->getContext();
498 if (isKnown(Attr.first))
506 const std::string getAsStr()
const override {
511 OS <<
' ' << Attr.second;
517 void trackStatistics()
const override {}
522 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(
F->getCallingConv());
524 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
526 bool NeedsQueuePtr =
false;
531 NeedsQueuePtr =
true;
537 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
543 if (!HasApertureRegs) {
544 bool UsedAssumedInformation =
false;
545 A.checkForAllInstructions(CheckAddrSpaceCasts, *
this,
546 {Instruction::AddrSpaceCast},
547 UsedAssumedInformation);
554 if (!IsNonEntryFunc && HasApertureRegs)
559 for (
const Use &U :
I.operands()) {
560 if (
const auto *
C = dyn_cast<Constant>(U)) {
561 if (InfoCache.needsQueuePtr(
C, *
F))
571 bool funcRetrievesMultigridSyncArg(
Attributor &
A,
unsigned COV) {
574 return funcRetrievesImplicitKernelArg(
A, Range);
577 bool funcRetrievesHostcallPtr(
Attributor &
A,
unsigned COV) {
580 return funcRetrievesImplicitKernelArg(
A, Range);
583 bool funcRetrievesDefaultQueue(
Attributor &
A,
unsigned COV) {
586 return funcRetrievesImplicitKernelArg(
A, Range);
589 bool funcRetrievesCompletionAction(
Attributor &
A,
unsigned COV) {
592 return funcRetrievesImplicitKernelArg(
A, Range);
595 bool funcRetrievesHeapPtr(
Attributor &
A,
unsigned COV) {
599 return funcRetrievesImplicitKernelArg(
A, Range);
602 bool funcRetrievesQueuePtr(
Attributor &
A,
unsigned COV) {
606 return funcRetrievesImplicitKernelArg(
A, Range);
617 auto &
Call = cast<CallBase>(
I);
618 if (
Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
630 bool UsedAssumedInformation =
false;
631 return !
A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *
this,
632 UsedAssumedInformation);
637 auto &
Call = cast<CallBase>(
I);
638 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
640 bool UsedAssumedInformation =
false;
641 return !
A.checkForAllCallLikeInstructions(DoesNotRetrieve, *
this,
642 UsedAssumedInformation);
646AAAMDAttributes &AAAMDAttributes::createForPosition(
const IRPosition &IRP,
649 return *
new (
A.Allocator) AAAMDAttributesFunction(IRP,
A);
654struct AAAMDFlatWorkGroupSize
655 :
public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
666 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
667 unsigned MinGroupSize, MaxGroupSize;
668 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*
F);
672 if (AMDGPU::isEntryFunctionCC(
F->getCallingConv()))
673 indicatePessimisticFixpoint();
682 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
684 const auto &CallerInfo =
A.getAAFor<AAAMDFlatWorkGroupSize>(
693 bool AllCallSitesKnown =
true;
694 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
695 return indicatePessimisticFixpoint();
705 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
707 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*
F);
710 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
715 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
723 const std::string getAsStr()
const override {
726 OS <<
"AMDFlatWorkGroupSize[";
727 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
733 void trackStatistics()
const override {}
736 static AAAMDFlatWorkGroupSize &createForPosition(
const IRPosition &IRP,
740 const std::string
getName()
const override {
741 return "AAAMDFlatWorkGroupSize";
745 const char *getIdAddr()
const override {
return &
ID; }
754 static const char ID;
757const char AAAMDFlatWorkGroupSize::ID = 0;
759AAAMDFlatWorkGroupSize &
760AAAMDFlatWorkGroupSize::createForPosition(
const IRPosition &IRP,
763 return *
new (
A.Allocator) AAAMDFlatWorkGroupSize(IRP,
A);
765 "AAAMDFlatWorkGroupSize is only valid for function position");
775 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
787 if (!
F.isIntrinsic())
793 AMDGPUInformationCache InfoCache(M, AG, Allocator,
nullptr, *TM);
795 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
801 AC.IsModulePass =
true;
802 AC.DefaultInitializeLiveInternals =
false;
807 if (!
F.isIntrinsic()) {
810 if (!AMDGPU::isEntryFunctionCC(
F.getCallingConv())) {
830char AMDGPUAttributor::ID = 0;
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
ImplicitArgumentPositions
static bool castRequiresQueuePtr(unsigned SrcAS)
amdgpu Simplify well known AMD library false FunctionCallee Callee
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
AMD GCN specific subclass of TargetSubtarget.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
LLVM Basic Block Representation.
Allocate memory in an ever growing pool, as if by bump-pointer.
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
A constant value that is initialized with an expression using other constant values.
This class represents a range of values.
This is an important base class in LLVM.
Legacy analysis pass which computes a CycleInfo.
Implements a dense probed hash-table based set.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A vector that has set insertion semantics.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
A Use represents the edge between a Value definition and its users.
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getCodeObjectVersion(const Module &M)
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
This is an optimization pass for GlobalISel generic memory operations.
void initializeCycleInfoWrapperPassPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Pass * createAMDGPUAttributorPass()
@ REQUIRED
The target cannot be valid if the source is not.
An abstract state for querying live call edges.
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
An abstract interface for struct information.
virtual bool forallInterferingAccesses(AA::RangeTy Range, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with Range and return true if all such accesses were kno...
static const char ID
Unique ID (due to the unique address)
static const char ID
Unique ID (due to the unique address)
static const char ID
Unique ID (due to the unique address)
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Base struct for all "concrete attribute" deductions.
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
Configuration for the Attributor.
The fixpoint analysis framework that orchestrates the attribute deduction.
static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, const ArrayRef< Attribute > &DeducedAttrs, bool ForceReplace=false)
Helper to describe and deal with positions in the LLVM-IR.
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
@ IRP_FUNCTION
An attribute for a function (scope).
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Kind getPositionKind() const
Return the associated position kind.
State for an integer range.
Helper to tie a abstract state implementation to an abstract attribute.
StateType & getState() override
See AbstractAttribute::getState(...).