28 if (
Arg.hasByRefAttr()) {
29 Ty =
Arg.getParamByRefType();
30 ArgAlign =
Arg.getParamAlign();
34 ArgAlign =
DL.getABITypeAlign(Ty);
36 return std::make_pair(Ty, *ArgAlign);
42 "amdgpu-dump-hsa-metadata",
43 cl::desc(
"Dump AMDGPU HSA Metadata"));
45 "amdgpu-verify-hsa-metadata",
46 cl::desc(
"Verify AMDGPU HSA Metadata"));
54 void MetadataStreamerV2::dump(
StringRef HSAMetadataString)
const {
55 errs() <<
"AMDGPU HSA Metadata:\n" << HSAMetadataString <<
'\n';
58 void MetadataStreamerV2::verify(
StringRef HSAMetadataString)
const {
59 errs() <<
"AMDGPU HSA Metadata Parser Test: ";
61 HSAMD::Metadata FromHSAMetadataString;
62 if (
fromString(HSAMetadataString, FromHSAMetadataString)) {
67 std::string ToHSAMetadataString;
68 if (
toString(FromHSAMetadataString, ToHSAMetadataString)) {
73 errs() << (HSAMetadataString == ToHSAMetadataString ?
"PASS" :
"FAIL")
75 if (HSAMetadataString != ToHSAMetadataString) {
76 errs() <<
"Original input: " << HSAMetadataString <<
'\n'
77 <<
"Produced output: " << ToHSAMetadataString <<
'\n';
94 MetadataStreamerV2::getAddressSpaceQualifier(
134 .
Default(isa<PointerType>(Ty) ?
142 std::string MetadataStreamerV2::getTypeName(
Type *Ty,
bool Signed)
const {
146 return (
Twine(
'u') + getTypeName(Ty,
true)).str();
169 auto VecTy = cast<FixedVectorType>(Ty);
170 auto ElTy = VecTy->getElementType();
171 auto NumElements = VecTy->getNumElements();
179 std::vector<uint32_t>
180 MetadataStreamerV2::getWorkGroupDimensions(
MDNode *Node)
const {
181 std::vector<uint32_t> Dims;
182 if (Node->getNumOperands() != 3)
185 for (
auto &
Op : Node->operands())
186 Dims.push_back(mdconst::extract<ConstantInt>(
Op)->getZExtValue());
190 Kernel::CodeProps::Metadata
195 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
201 Align MaxKernArgAlign;
204 HSACodeProps.mKernargSegmentAlign =
207 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.
LDSSize;
208 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.
ScratchSize;
210 HSACodeProps.mNumSGPRs = ProgramInfo.
NumSGPR;
211 HSACodeProps.mNumVGPRs = ProgramInfo.
NumVGPR;
212 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
215 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
216 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
221 Kernel::DebugProps::Metadata
224 return HSAMD::Kernel::DebugProps::Metadata();
234 void MetadataStreamerV2::emitPrintf(
const Module &Mod) {
241 for (
auto Op : Node->operands())
242 if (
Op->getNumOperands())
244 std::string(cast<MDString>(
Op->getOperand(0))->getString()));
247 void MetadataStreamerV2::emitKernelLanguage(
const Function &Func) {
248 auto &Kernel = HSAMetadata.
mKernels.back();
251 auto Node = Func.getParent()->getNamedMetadata(
"opencl.ocl.version");
252 if (!Node || !Node->getNumOperands())
254 auto Op0 = Node->getOperand(0);
255 if (Op0->getNumOperands() <= 1)
258 Kernel.mLanguage =
"OpenCL C";
259 Kernel.mLanguageVersion.push_back(
260 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
261 Kernel.mLanguageVersion.push_back(
262 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
265 void MetadataStreamerV2::emitKernelAttrs(
const Function &Func) {
268 if (
auto Node = Func.getMetadata(
"reqd_work_group_size"))
269 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
270 if (
auto Node = Func.getMetadata(
"work_group_size_hint"))
271 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
272 if (
auto Node = Func.getMetadata(
"vec_type_hint")) {
273 Attrs.mVecTypeHint = getTypeName(
274 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
275 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
277 if (Func.hasFnAttribute(
"runtime-handle")) {
278 Attrs.mRuntimeHandle =
279 Func.getFnAttribute(
"runtime-handle").getValueAsString().str();
283 void MetadataStreamerV2::emitKernelArgs(
const Function &Func,
288 emitHiddenKernelArgs(Func,
ST);
291 void MetadataStreamerV2::emitKernelArg(
const Argument &
Arg) {
293 auto ArgNo =
Arg.getArgNo();
297 Node =
Func->getMetadata(
"kernel_arg_name");
298 if (Node && ArgNo < Node->getNumOperands())
299 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
300 else if (
Arg.hasName())
304 Node =
Func->getMetadata(
"kernel_arg_type");
305 if (Node && ArgNo < Node->getNumOperands())
306 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
309 Node =
Func->getMetadata(
"kernel_arg_base_type");
310 if (Node && ArgNo < Node->getNumOperands())
311 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
314 if (
Arg.getType()->isPointerTy() &&
Arg.onlyReadsMemory() &&
315 Arg.hasNoAliasAttr()) {
318 Node =
Func->getMetadata(
"kernel_arg_access_qual");
319 if (Node && ArgNo < Node->getNumOperands())
320 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
324 Node =
Func->getMetadata(
"kernel_arg_type_qual");
325 if (Node && ArgNo < Node->getNumOperands())
326 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
331 if (
auto PtrTy = dyn_cast<PointerType>(
Arg.getType())) {
342 emitKernelArg(
DL, ArgTy, ArgAlign,
353 HSAMetadata.
mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
354 auto &
Arg = HSAMetadata.
mKernels.back().mArgs.back();
358 Arg.mSize =
DL.getTypeAllocSize(Ty);
363 if (
auto PtrTy = dyn_cast<PointerType>(Ty))
364 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
371 TypeQual.
split(SplitTypeQuals,
" ", -1,
false);
375 .
Case(
"restrict", &
Arg.mIsRestrict)
376 .
Case(
"volatile", &
Arg.mIsVolatile)
384 void MetadataStreamerV2::emitHiddenKernelArgs(
const Function &Func,
386 unsigned HiddenArgNumBytes =
ST.getImplicitArgNumBytes(Func);
387 if (!HiddenArgNumBytes)
390 auto &
DL =
Func.getParent()->getDataLayout();
393 if (HiddenArgNumBytes >= 8)
395 if (HiddenArgNumBytes >= 16)
397 if (HiddenArgNumBytes >= 24)
403 if (HiddenArgNumBytes >= 32) {
407 if (
Func.getParent()->getNamedMetadata(
"llvm.printf.fmts"))
409 else if (!
Func.hasFnAttribute(
"amdgpu-no-hostcall-ptr"))
417 if (HiddenArgNumBytes >= 48) {
418 if (
Func.hasFnAttribute(
"calls-enqueue-kernel")) {
428 if (HiddenArgNumBytes >= 56) {
429 if (!
Func.hasFnAttribute(
"amdgpu-no-multigrid-sync-arg"))
447 std::string HSAMetadataString;
448 if (
toString(HSAMetadata, HSAMetadataString))
452 dump(HSAMetadataString);
454 verify(HSAMetadataString);
463 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
464 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
467 auto &Kernel = HSAMetadata.
mKernels.back();
470 Kernel.mName = std::string(Func.getName());
471 Kernel.mSymbolName = (
Twine(Func.getName()) +
Twine(
"@kd")).str();
472 emitKernelLanguage(Func);
473 emitKernelAttrs(Func);
474 emitKernelArgs(Func,
ST);
484 errs() <<
"AMDGPU HSA Metadata:\n" << HSAMetadataString <<
'\n';
488 errs() <<
"AMDGPU HSA Metadata Parser Test: ";
492 if (!FromHSAMetadataString.
fromYAML(HSAMetadataString)) {
497 std::string ToHSAMetadataString;
499 FromHSAMetadataString.
toYAML(StrOS);
501 errs() << (HSAMetadataString == StrOS.
str() ?
"PASS" :
"FAIL") <<
'\n';
502 if (HSAMetadataString != ToHSAMetadataString) {
503 errs() <<
"Original input: " << HSAMetadataString <<
'\n'
504 <<
"Produced output: " << StrOS.
str() <<
'\n';
511 .Case(
"read_only",
StringRef(
"read_only"))
512 .Case(
"write_only",
StringRef(
"write_only"))
513 .Case(
"read_write",
StringRef(
"read_write"))
543 .
Case(
"image1d_t",
"image")
544 .
Case(
"image1d_array_t",
"image")
545 .
Case(
"image1d_buffer_t",
"image")
546 .
Case(
"image2d_t",
"image")
547 .
Case(
"image2d_array_t",
"image")
548 .
Case(
"image2d_array_depth_t",
"image")
549 .
Case(
"image2d_array_msaa_t",
"image")
550 .
Case(
"image2d_array_msaa_depth_t",
"image")
551 .
Case(
"image2d_depth_t",
"image")
552 .
Case(
"image2d_msaa_t",
"image")
553 .
Case(
"image2d_msaa_depth_t",
"image")
554 .
Case(
"image3d_t",
"image")
555 .
Case(
"sampler_t",
"sampler")
556 .
Case(
"queue_t",
"queue")
559 ?
"dynamic_shared_pointer"
591 auto VecTy = cast<FixedVectorType>(Ty);
592 auto ElTy = VecTy->getElementType();
593 auto NumElements = VecTy->getNumElements();
604 if (Node->getNumOperands() != 3)
607 for (
auto &
Op : Node->operands())
608 Dims.push_back(Dims.getDocument()->getNode(
609 uint64_t(mdconst::extract<ConstantInt>(
Op)->getZExtValue())));
626 for (
auto Op : Node->operands())
627 if (
Op->getNumOperands())
629 cast<MDString>(
Op->getOperand(0))->getString(),
true));
636 auto Node = Func.getParent()->getNamedMetadata(
"opencl.ocl.version");
637 if (!Node || !Node->getNumOperands())
639 auto Op0 = Node->getOperand(0);
640 if (Op0->getNumOperands() <= 1)
646 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
648 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
655 if (
auto Node = Func.getMetadata(
"reqd_work_group_size"))
657 if (
auto Node = Func.getMetadata(
"work_group_size_hint"))
659 if (
auto Node = Func.getMetadata(
"vec_type_hint")) {
662 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
663 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
666 if (Func.hasFnAttribute(
"runtime-handle")) {
668 Func.getFnAttribute(
"runtime-handle").getValueAsString().str(),
671 if (Func.hasFnAttribute(
"device-init"))
673 else if (Func.hasFnAttribute(
"device-fini"))
682 for (
auto &
Arg : Func.args())
687 Kern[
".args"] =
Args;
692 auto Func =
Arg.getParent();
693 auto ArgNo =
Arg.getArgNo();
697 Node = Func->getMetadata(
"kernel_arg_name");
698 if (Node && ArgNo < Node->getNumOperands())
699 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
700 else if (
Arg.hasName())
704 Node = Func->getMetadata(
"kernel_arg_type");
705 if (Node && ArgNo < Node->getNumOperands())
706 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
709 Node = Func->getMetadata(
"kernel_arg_base_type");
710 if (Node && ArgNo < Node->getNumOperands())
711 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
714 if (
Arg.getType()->isPointerTy() &&
Arg.onlyReadsMemory() &&
715 Arg.hasNoAliasAttr()) {
718 Node = Func->getMetadata(
"kernel_arg_access_qual");
719 if (Node && ArgNo < Node->getNumOperands())
720 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
724 Node = Func->getMetadata(
"kernel_arg_type_qual");
725 if (Node && ArgNo < Node->getNumOperands())
726 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
728 const DataLayout &
DL = Func->getParent()->getDataLayout();
731 Type *Ty =
Arg.hasByRefAttr() ?
Arg.getParamByRefType() :
Arg.getType();
734 if (
auto PtrTy = dyn_cast<PointerType>(Ty)) {
754 auto Arg =
Args.getDocument()->getMapNode();
757 Arg[
".name"] =
Arg.getDocument()->getNode(
Name,
true);
760 auto Size =
DL.getTypeAllocSize(Ty);
761 Arg[
".size"] =
Arg.getDocument()->getNode(Size);
762 Offset =
alignTo(Offset, Alignment);
763 Arg[
".offset"] =
Arg.getDocument()->getNode(Offset);
769 if (
auto PtrTy = dyn_cast<PointerType>(Ty))
771 Arg[
".address_space"] =
Arg.getDocument()->getNode(*Qualifier,
true);
774 Arg[
".access"] =
Arg.getDocument()->getNode(*AQ,
true);
779 TypeQual.
split(SplitTypeQuals,
" ", -1,
false);
782 Arg[
".is_const"] =
Arg.getDocument()->getNode(
true);
783 else if (
Key ==
"restrict")
784 Arg[
".is_restrict"] =
Arg.getDocument()->getNode(
true);
785 else if (
Key ==
"volatile")
786 Arg[
".is_volatile"] =
Arg.getDocument()->getNode(
true);
787 else if (
Key ==
"pipe")
788 Arg[
".is_pipe"] =
Arg.getDocument()->getNode(
true);
800 unsigned HiddenArgNumBytes =
ST.getImplicitArgNumBytes(Func);
801 if (!HiddenArgNumBytes)
804 const Module *
M = Func.getParent();
805 auto &
DL =
M->getDataLayout();
808 Offset =
alignTo(Offset,
ST.getAlignmentForImplicitArgPtr());
810 if (HiddenArgNumBytes >= 8)
813 if (HiddenArgNumBytes >= 16)
816 if (HiddenArgNumBytes >= 24)
823 if (HiddenArgNumBytes >= 32) {
827 if (
M->getNamedMetadata(
"llvm.printf.fmts"))
830 else if (!Func.hasFnAttribute(
"amdgpu-no-hostcall-ptr"))
839 if (HiddenArgNumBytes >= 48) {
840 if (Func.hasFnAttribute(
"calls-enqueue-kernel")) {
852 if (HiddenArgNumBytes >= 56) {
853 if (!Func.hasFnAttribute(
"amdgpu-no-multigrid-sync-arg")) {
871 Align MaxKernArgAlign;
872 Kern[
".kernarg_segment_size"] = Kern.getDocument()->getNode(
874 Kern[
".group_segment_fixed_size"] =
875 Kern.getDocument()->getNode(ProgramInfo.
LDSSize);
876 Kern[
".private_segment_fixed_size"] =
877 Kern.getDocument()->getNode(ProgramInfo.
ScratchSize);
880 Kern[
".kernarg_segment_align"] =
881 Kern.getDocument()->getNode(
std::max(
Align(4), MaxKernArgAlign).value());
882 Kern[
".wavefront_size"] =
884 Kern[
".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.
NumSGPR);
885 Kern[
".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.
NumVGPR);
889 Kern[
".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.
NumAccVGPR);
892 Kern[
".max_flat_workgroup_size"] =
893 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
894 Kern[
".sgpr_spill_count"] =
895 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
896 Kern[
".vgpr_spill_count"] =
897 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
914 std::string HSAMetadataString;
938 (
Twine(Func.getName()) +
Twine(
".kd")).str(),
true);
989 if (
ST.getImplicitArgNumBytes(Func) == 0)
992 const Module *
M = Func.getParent();
993 auto &
DL =
M->getDataLayout();
1000 Offset =
alignTo(Offset,
ST.getAlignmentForImplicitArgPtr());
1028 if (
M->getNamedMetadata(
"llvm.printf.fmts")) {
1035 if (!Func.hasFnAttribute(
"amdgpu-no-hostcall-ptr")) {
1042 if (!Func.hasFnAttribute(
"amdgpu-no-multigrid-sync-arg")) {
1049 if (!Func.hasFnAttribute(
"amdgpu-no-heap-ptr"))
1054 if (Func.hasFnAttribute(
"calls-enqueue-kernel")) {
1067 if (!
ST.hasApertureRegs()) {