34 ArgAlign =
DL.getABITypeAlign(Ty);
36 return std::pair(Ty, *ArgAlign);
42 "amdgpu-dump-hsa-metadata",
43 cl::desc(
"Dump AMDGPU HSA Metadata"));
45 "amdgpu-verify-hsa-metadata",
46 cl::desc(
"Verify AMDGPU HSA Metadata"));
56 errs() <<
"AMDGPU HSA Metadata:\n" << HSAMetadataString <<
'\n';
60 errs() <<
"AMDGPU HSA Metadata Parser Test: ";
64 if (!FromHSAMetadataString.
fromYAML(HSAMetadataString)) {
69 std::string ToHSAMetadataString;
71 FromHSAMetadataString.
toYAML(StrOS);
73 errs() << (HSAMetadataString == StrOS.
str() ?
"PASS" :
"FAIL") <<
'\n';
74 if (HSAMetadataString != ToHSAMetadataString) {
75 errs() <<
"Original input: " << HSAMetadataString <<
'\n'
76 <<
"Produced output: " << StrOS.
str() <<
'\n';
80std::optional<StringRef>
83 .Case(
"read_only",
StringRef(
"read_only"))
84 .Case(
"write_only",
StringRef(
"write_only"))
85 .Case(
"read_write",
StringRef(
"read_write"))
116 .
Case(
"image1d_t",
"image")
117 .
Case(
"image1d_array_t",
"image")
118 .
Case(
"image1d_buffer_t",
"image")
119 .
Case(
"image2d_t",
"image")
120 .
Case(
"image2d_array_t",
"image")
121 .
Case(
"image2d_array_depth_t",
"image")
122 .
Case(
"image2d_array_msaa_t",
"image")
123 .
Case(
"image2d_array_msaa_depth_t",
"image")
124 .
Case(
"image2d_depth_t",
"image")
125 .
Case(
"image2d_msaa_t",
"image")
126 .
Case(
"image2d_msaa_depth_t",
"image")
127 .
Case(
"image3d_t",
"image")
128 .
Case(
"sampler_t",
"sampler")
129 .
Case(
"queue_t",
"queue")
132 ?
"dynamic_shared_pointer"
165 auto VecTy = cast<FixedVectorType>(Ty);
166 auto ElTy = VecTy->getElementType();
167 auto NumElements = VecTy->getNumElements();
178 if (Node->getNumOperands() != 3)
181 for (
auto &
Op : Node->operands())
182 Dims.push_back(Dims.getDocument()->getNode(
183 uint64_t(mdconst::extract<ConstantInt>(
Op)->getZExtValue())));
189 Version.push_back(Version.getDocument()->getNode(
VersionMajorV3));
190 Version.push_back(Version.getDocument()->getNode(
VersionMinorV3));
195 auto Node =
Mod.getNamedMetadata(
"llvm.printf.fmts");
200 for (
auto *
Op : Node->operands())
202 Printf.push_back(Printf.getDocument()->getNode(
203 cast<MDString>(
Op->getOperand(0))->getString(),
true));
210 auto Node = Func.getParent()->getNamedMetadata(
"opencl.ocl.version");
211 if (!Node || !Node->getNumOperands())
213 auto Op0 = Node->getOperand(0);
214 if (Op0->getNumOperands() <= 1)
220 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
222 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
229 if (
auto Node = Func.getMetadata(
"reqd_work_group_size"))
231 if (
auto Node = Func.getMetadata(
"work_group_size_hint"))
233 if (
auto Node = Func.getMetadata(
"vec_type_hint")) {
236 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
237 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
240 if (Func.hasFnAttribute(
"runtime-handle")) {
242 Func.getFnAttribute(
"runtime-handle").getValueAsString().str(),
245 if (Func.hasFnAttribute(
"device-init"))
247 else if (Func.hasFnAttribute(
"device-fini"))
256 for (
auto &Arg : Func.args())
261 Kern[
".args"] = Args;
272 Node = Func->getMetadata(
"kernel_arg_name");
273 if (Node && ArgNo < Node->getNumOperands())
274 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
279 Node = Func->getMetadata(
"kernel_arg_type");
280 if (Node && ArgNo < Node->getNumOperands())
281 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
284 Node = Func->getMetadata(
"kernel_arg_base_type");
285 if (Node && ArgNo < Node->getNumOperands())
286 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
292 ActAccQual =
"read_only";
294 ActAccQual =
"write_only";
298 Node = Func->getMetadata(
"kernel_arg_access_qual");
299 if (Node && ArgNo < Node->getNumOperands())
300 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
303 Node = Func->getMetadata(
"kernel_arg_type_qual");
304 if (Node && ArgNo < Node->getNumOperands())
305 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
307 const DataLayout &
DL = Func->getParent()->getDataLayout();
313 if (
auto PtrTy = dyn_cast<PointerType>(Ty)) {
325 PointeeAlign,
Name, TypeName, BaseTypeName, ActAccQual,
334 auto Arg = Args.getDocument()->getMapNode();
337 Arg[
".name"] = Arg.getDocument()->getNode(
Name,
true);
338 if (!TypeName.empty())
339 Arg[
".type_name"] = Arg.getDocument()->getNode(TypeName,
true);
340 auto Size =
DL.getTypeAllocSize(Ty);
341 Arg[
".size"] = Arg.getDocument()->getNode(
Size);
343 Arg[
".offset"] = Arg.getDocument()->getNode(
Offset);
345 Arg[
".value_kind"] = Arg.getDocument()->getNode(
ValueKind,
true);
347 Arg[
".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
349 if (
auto PtrTy = dyn_cast<PointerType>(Ty))
353 Arg[
".address_space"] = Arg.getDocument()->getNode(*Qualifier,
357 Arg[
".access"] = Arg.getDocument()->getNode(*AQ,
true);
360 Arg[
".actual_access"] = Arg.getDocument()->getNode(*AAQ,
true);
363 TypeQual.
split(SplitTypeQuals,
" ", -1,
false);
366 Arg[
".is_const"] = Arg.getDocument()->getNode(
true);
367 else if (Key ==
"restrict")
368 Arg[
".is_restrict"] = Arg.getDocument()->getNode(
true);
369 else if (Key ==
"volatile")
370 Arg[
".is_volatile"] = Arg.getDocument()->getNode(
true);
371 else if (Key ==
"pipe")
372 Arg[
".is_pipe"] = Arg.getDocument()->getNode(
true);
383 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
384 if (!HiddenArgNumBytes)
387 const Module *M = Func.getParent();
388 auto &
DL = M->getDataLayout();
393 if (HiddenArgNumBytes >= 8)
396 if (HiddenArgNumBytes >= 16)
399 if (HiddenArgNumBytes >= 24)
406 if (HiddenArgNumBytes >= 32) {
410 if (M->getNamedMetadata(
"llvm.printf.fmts"))
413 else if (!Func.hasFnAttribute(
"amdgpu-no-hostcall-ptr"))
422 if (HiddenArgNumBytes >= 40) {
423 if (!Func.hasFnAttribute(
"amdgpu-no-default-queue")) {
431 if (HiddenArgNumBytes >= 48) {
432 if (!Func.hasFnAttribute(
"amdgpu-no-completion-action")) {
441 if (HiddenArgNumBytes >= 56) {
442 if (!Func.hasFnAttribute(
"amdgpu-no-multigrid-sync-arg")) {
453 unsigned CodeObjectVersion)
const {
460 Align MaxKernArgAlign;
461 Kern[
".kernarg_segment_size"] = Kern.getDocument()->getNode(
463 Kern[
".group_segment_fixed_size"] =
464 Kern.getDocument()->getNode(ProgramInfo.
LDSSize);
465 Kern[
".private_segment_fixed_size"] =
466 Kern.getDocument()->getNode(ProgramInfo.
ScratchSize);
468 Kern[
".uses_dynamic_stack"] =
472 Kern[
".workgroup_processor_mode"] =
473 Kern.getDocument()->getNode(ProgramInfo.
WgpMode);
476 Kern[
".kernarg_segment_align"] =
477 Kern.getDocument()->getNode(std::max(
Align(4), MaxKernArgAlign).
value());
478 Kern[
".wavefront_size"] =
480 Kern[
".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.
NumSGPR);
481 Kern[
".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.
NumVGPR);
485 Kern[
".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.
NumAccVGPR);
488 Kern[
".max_flat_workgroup_size"] =
489 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
490 Kern[
".sgpr_spill_count"] =
491 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
492 Kern[
".vgpr_spill_count"] =
493 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
510 std::string HSAMetadataString;
536 (
Twine(Func.getName()) +
Twine(
".kd")).str(),
true);
542 Kernels.push_back(Kern);
551 Version.push_back(Version.getDocument()->getNode(
VersionMajorV4));
552 Version.push_back(Version.getDocument()->getNode(
VersionMinorV4));
576 Version.push_back(Version.getDocument()->getNode(
VersionMajorV5));
577 Version.push_back(Version.getDocument()->getNode(
VersionMinorV5));
587 if (ST.getImplicitArgNumBytes(Func) == 0)
590 const Module *M = Func.getParent();
591 auto &
DL = M->getDataLayout();
626 if (M->getNamedMetadata(
"llvm.printf.fmts")) {
633 if (!Func.hasFnAttribute(
"amdgpu-no-hostcall-ptr")) {
640 if (!Func.hasFnAttribute(
"amdgpu-no-multigrid-sync-arg")) {
647 if (!Func.hasFnAttribute(
"amdgpu-no-heap-ptr"))
652 if (!Func.hasFnAttribute(
"amdgpu-no-default-queue")) {
659 if (!Func.hasFnAttribute(
"amdgpu-no-completion-action")) {
670 if (!ST.hasApertureRegs()) {
685 if (Func.getFnAttribute(
"uniform-work-group-size").getValueAsBool())
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Given that RA is a live value
AMD GCN specific subclass of TargetSubtarget.
Module.h This file contains the declarations for the Module class.
Defines struct to track resource usage and hardware flags for kernels and entry functions.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
std::string toString() const
This class represents an incoming formal argument to a Function.
Type * getParamByRefType() const
If this is a byref argument, return its type.
bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
bool hasByRefAttr() const
Return true if this argument has the byref attribute.
bool onlyReadsMemory() const
Return true if this argument has the readonly or readnone attribute.
bool hasAttribute(Attribute::AttrKind Kind) const
Check if an argument has a given attribute.
const Function * getParent() const
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
MaybeAlign getParamAlign() const
If this is a byval or inalloca argument, return its alignment.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A Module instance is used to store all the information related to an LLVM module.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ HalfTyID
16-bit floating point type
@ FloatTyID
32-bit floating point type
@ IntegerTyID
Arbitrary bit width integers.
@ FixedVectorTyID
Fixed width SIMD vector type.
@ DoubleTyID
64-bit floating point type
static IntegerType * getInt16Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeID getTypeID() const
Return the type id for the type.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
A DocNode that is an array.
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Document * getDocument() const
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode getNode()
Create a nil node associated with this Document.
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
unsigned LanguageVersion(SourceLanguage L)
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr uint32_t VersionMajorV5
HSA metadata major version for code object V5.
constexpr uint32_t VersionMinorV4
HSA metadata minor version for code object V4.
constexpr uint32_t VersionMajorV3
HSA metadata major version for code object V3.
constexpr uint32_t VersionMinorV5
HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV3
HSA metadata minor version for code object V3.
constexpr uint32_t VersionMajorV4
HSA metadata major version for code object V4.
unsigned getCodeObjectVersion(const Module &M)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
StringRef getTypeName()
We provide a function which tries to compute the (demangled) name of a type statically.
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr unsigned BitWidth
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Track resource usage for kernels / entry functions.