LLVM 18.0.0git
AMDGPUHSAMetadataStreamer.cpp
Go to the documentation of this file.
1//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// AMDGPU HSA Metadata Streamer.
11///
12//
13//===----------------------------------------------------------------------===//
14
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
20#include "SIProgramInfo.h"
21#include "llvm/IR/Module.h"
22using namespace llvm;
23
24static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25 const DataLayout &DL) {
26 Type *Ty = Arg.getType();
27 MaybeAlign ArgAlign;
28 if (Arg.hasByRefAttr()) {
29 Ty = Arg.getParamByRefType();
30 ArgAlign = Arg.getParamAlign();
31 }
32
33 if (!ArgAlign)
34 ArgAlign = DL.getABITypeAlign(Ty);
35
36 return std::pair(Ty, *ArgAlign);
37}
38
39namespace llvm {
40
42 "amdgpu-dump-hsa-metadata",
43 cl::desc("Dump AMDGPU HSA Metadata"));
45 "amdgpu-verify-hsa-metadata",
46 cl::desc("Verify AMDGPU HSA Metadata"));
47
48namespace AMDGPU {
49namespace HSAMD {
50
51//===----------------------------------------------------------------------===//
52// HSAMetadataStreamerV3
53//===----------------------------------------------------------------------===//
54
55void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const {
56 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
57}
58
59void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
60 errs() << "AMDGPU HSA Metadata Parser Test: ";
61
62 msgpack::Document FromHSAMetadataString;
63
64 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
65 errs() << "FAIL\n";
66 return;
67 }
68
69 std::string ToHSAMetadataString;
70 raw_string_ostream StrOS(ToHSAMetadataString);
71 FromHSAMetadataString.toYAML(StrOS);
72
73 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
74 if (HSAMetadataString != ToHSAMetadataString) {
75 errs() << "Original input: " << HSAMetadataString << '\n'
76 << "Produced output: " << StrOS.str() << '\n';
77 }
78}
79
80std::optional<StringRef>
83 .Case("read_only", StringRef("read_only"))
84 .Case("write_only", StringRef("write_only"))
85 .Case("read_write", StringRef("read_write"))
86 .Default(std::nullopt);
87}
88
90 unsigned AddressSpace) const {
91 switch (AddressSpace) {
93 return StringRef("private");
95 return StringRef("global");
97 return StringRef("constant");
99 return StringRef("local");
101 return StringRef("generic");
103 return StringRef("region");
104 default:
105 return std::nullopt;
106 }
107}
108
111 StringRef BaseTypeName) const {
112 if (TypeQual.contains("pipe"))
113 return "pipe";
114
115 return StringSwitch<StringRef>(BaseTypeName)
116 .Case("image1d_t", "image")
117 .Case("image1d_array_t", "image")
118 .Case("image1d_buffer_t", "image")
119 .Case("image2d_t", "image")
120 .Case("image2d_array_t", "image")
121 .Case("image2d_array_depth_t", "image")
122 .Case("image2d_array_msaa_t", "image")
123 .Case("image2d_array_msaa_depth_t", "image")
124 .Case("image2d_depth_t", "image")
125 .Case("image2d_msaa_t", "image")
126 .Case("image2d_msaa_depth_t", "image")
127 .Case("image3d_t", "image")
128 .Case("sampler_t", "sampler")
129 .Case("queue_t", "queue")
130 .Default(isa<PointerType>(Ty)
132 ? "dynamic_shared_pointer"
133 : "global_buffer")
134 : "by_value");
135}
136
138 bool Signed) const {
139 switch (Ty->getTypeID()) {
140 case Type::IntegerTyID: {
141 if (!Signed)
142 return (Twine('u') + getTypeName(Ty, true)).str();
143
144 auto BitWidth = Ty->getIntegerBitWidth();
145 switch (BitWidth) {
146 case 8:
147 return "char";
148 case 16:
149 return "short";
150 case 32:
151 return "int";
152 case 64:
153 return "long";
154 default:
155 return (Twine('i') + Twine(BitWidth)).str();
156 }
157 }
158 case Type::HalfTyID:
159 return "half";
160 case Type::FloatTyID:
161 return "float";
162 case Type::DoubleTyID:
163 return "double";
165 auto VecTy = cast<FixedVectorType>(Ty);
166 auto ElTy = VecTy->getElementType();
167 auto NumElements = VecTy->getNumElements();
168 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
169 }
170 default:
171 return "unknown";
172 }
173}
174
177 auto Dims = HSAMetadataDoc->getArrayNode();
178 if (Node->getNumOperands() != 3)
179 return Dims;
180
181 for (auto &Op : Node->operands())
182 Dims.push_back(Dims.getDocument()->getNode(
183 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
184 return Dims;
185}
186
188 auto Version = HSAMetadataDoc->getArrayNode();
189 Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
190 Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
191 getRootMetadata("amdhsa.version") = Version;
192}
193
195 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
196 if (!Node)
197 return;
198
199 auto Printf = HSAMetadataDoc->getArrayNode();
200 for (auto *Op : Node->operands())
201 if (Op->getNumOperands())
202 Printf.push_back(Printf.getDocument()->getNode(
203 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
204 getRootMetadata("amdhsa.printf") = Printf;
205}
206
208 msgpack::MapDocNode Kern) {
209 // TODO: What about other languages?
210 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
211 if (!Node || !Node->getNumOperands())
212 return;
213 auto Op0 = Node->getOperand(0);
214 if (Op0->getNumOperands() <= 1)
215 return;
216
217 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
219 LanguageVersion.push_back(Kern.getDocument()->getNode(
220 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
221 LanguageVersion.push_back(Kern.getDocument()->getNode(
222 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
223 Kern[".language_version"] = LanguageVersion;
224}
225
227 msgpack::MapDocNode Kern) {
228
229 if (auto Node = Func.getMetadata("reqd_work_group_size"))
230 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
231 if (auto Node = Func.getMetadata("work_group_size_hint"))
232 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
233 if (auto Node = Func.getMetadata("vec_type_hint")) {
234 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
236 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
237 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
238 /*Copy=*/true);
239 }
240 if (Func.hasFnAttribute("runtime-handle")) {
241 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
242 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
243 /*Copy=*/true);
244 }
245 if (Func.hasFnAttribute("device-init"))
246 Kern[".kind"] = Kern.getDocument()->getNode("init");
247 else if (Func.hasFnAttribute("device-fini"))
248 Kern[".kind"] = Kern.getDocument()->getNode("fini");
249}
250
252 msgpack::MapDocNode Kern) {
253 auto &Func = MF.getFunction();
254 unsigned Offset = 0;
255 auto Args = HSAMetadataDoc->getArrayNode();
256 for (auto &Arg : Func.args())
257 emitKernelArg(Arg, Offset, Args);
258
259 emitHiddenKernelArgs(MF, Offset, Args);
260
261 Kern[".args"] = Args;
262}
263
265 unsigned &Offset,
267 auto Func = Arg.getParent();
268 auto ArgNo = Arg.getArgNo();
269 const MDNode *Node;
270
272 Node = Func->getMetadata("kernel_arg_name");
273 if (Node && ArgNo < Node->getNumOperands())
274 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
275 else if (Arg.hasName())
276 Name = Arg.getName();
277
278 StringRef TypeName;
279 Node = Func->getMetadata("kernel_arg_type");
280 if (Node && ArgNo < Node->getNumOperands())
281 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
282
283 StringRef BaseTypeName;
284 Node = Func->getMetadata("kernel_arg_base_type");
285 if (Node && ArgNo < Node->getNumOperands())
286 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
287
288 StringRef ActAccQual;
289 // Do we really need NoAlias check here?
290 if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
291 if (Arg.onlyReadsMemory())
292 ActAccQual = "read_only";
293 else if (Arg.hasAttribute(Attribute::WriteOnly))
294 ActAccQual = "write_only";
295 }
296
297 StringRef AccQual;
298 Node = Func->getMetadata("kernel_arg_access_qual");
299 if (Node && ArgNo < Node->getNumOperands())
300 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
301
302 StringRef TypeQual;
303 Node = Func->getMetadata("kernel_arg_type_qual");
304 if (Node && ArgNo < Node->getNumOperands())
305 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
306
307 const DataLayout &DL = Func->getParent()->getDataLayout();
308
309 MaybeAlign PointeeAlign;
310 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
311
312 // FIXME: Need to distinguish in memory alignment from pointer alignment.
313 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
314 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
315 PointeeAlign = Arg.getParamAlign().valueOrOne();
316 }
317
318 // There's no distinction between byval aggregates and raw aggregates.
319 Type *ArgTy;
320 Align ArgAlign;
321 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
322
323 emitKernelArg(DL, ArgTy, ArgAlign,
324 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
325 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
326 AccQual, TypeQual);
327}
328
330 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
331 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
332 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
333 StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
334 auto Arg = Args.getDocument()->getMapNode();
335
336 if (!Name.empty())
337 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
338 if (!TypeName.empty())
339 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
340 auto Size = DL.getTypeAllocSize(Ty);
341 Arg[".size"] = Arg.getDocument()->getNode(Size);
342 Offset = alignTo(Offset, Alignment);
343 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
344 Offset += Size;
345 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
346 if (PointeeAlign)
347 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
348
349 if (auto PtrTy = dyn_cast<PointerType>(Ty))
350 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
351 // Limiting address space to emit only for a certain ValueKind.
352 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
353 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
354 /*Copy=*/true);
355
356 if (auto AQ = getAccessQualifier(AccQual))
357 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
358
359 if (auto AAQ = getAccessQualifier(ActAccQual))
360 Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true);
361
362 SmallVector<StringRef, 1> SplitTypeQuals;
363 TypeQual.split(SplitTypeQuals, " ", -1, false);
364 for (StringRef Key : SplitTypeQuals) {
365 if (Key == "const")
366 Arg[".is_const"] = Arg.getDocument()->getNode(true);
367 else if (Key == "restrict")
368 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
369 else if (Key == "volatile")
370 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
371 else if (Key == "pipe")
372 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
373 }
374
375 Args.push_back(Arg);
376}
377
379 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
380 auto &Func = MF.getFunction();
381 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
382
383 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
384 if (!HiddenArgNumBytes)
385 return;
386
387 const Module *M = Func.getParent();
388 auto &DL = M->getDataLayout();
389 auto Int64Ty = Type::getInt64Ty(Func.getContext());
390
391 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
392
393 if (HiddenArgNumBytes >= 8)
394 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
395 Args);
396 if (HiddenArgNumBytes >= 16)
397 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
398 Args);
399 if (HiddenArgNumBytes >= 24)
400 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
401 Args);
402
403 auto Int8PtrTy =
404 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
405
406 if (HiddenArgNumBytes >= 32) {
407 // We forbid the use of features requiring hostcall when compiling OpenCL
408 // before code object V5, which makes the mutual exclusion between the
409 // "printf buffer" and "hostcall buffer" here sound.
410 if (M->getNamedMetadata("llvm.printf.fmts"))
411 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
412 Args);
413 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
414 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
415 Args);
416 else
417 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
418 }
419
420 // Emit "default queue" and "completion action" arguments if enqueue kernel is
421 // used, otherwise emit dummy "none" arguments.
422 if (HiddenArgNumBytes >= 40) {
423 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
424 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
425 Args);
426 } else {
427 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
428 }
429 }
430
431 if (HiddenArgNumBytes >= 48) {
432 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
433 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
434 Args);
435 } else {
436 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
437 }
438 }
439
440 // Emit the pointer argument for multi-grid object.
441 if (HiddenArgNumBytes >= 56) {
442 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
443 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
444 Args);
445 } else {
446 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
447 }
448 }
449}
450
452 const MachineFunction &MF, const SIProgramInfo &ProgramInfo,
453 unsigned CodeObjectVersion) const {
454 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
456 const Function &F = MF.getFunction();
457
458 auto Kern = HSAMetadataDoc->getMapNode();
459
460 Align MaxKernArgAlign;
461 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
462 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
463 Kern[".group_segment_fixed_size"] =
464 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
465 Kern[".private_segment_fixed_size"] =
466 Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
467 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
468 Kern[".uses_dynamic_stack"] =
469 Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
470
471 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
472 Kern[".workgroup_processor_mode"] =
473 Kern.getDocument()->getNode(ProgramInfo.WgpMode);
474
475 // FIXME: The metadata treats the minimum as 16?
476 Kern[".kernarg_segment_align"] =
477 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
478 Kern[".wavefront_size"] =
479 Kern.getDocument()->getNode(STM.getWavefrontSize());
480 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
481 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
482
483 // Only add AGPR count to metadata for supported devices
484 if (STM.hasMAIInsts()) {
485 Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
486 }
487
488 Kern[".max_flat_workgroup_size"] =
489 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
490 Kern[".sgpr_spill_count"] =
491 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
492 Kern[".vgpr_spill_count"] =
493 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
494
495 return Kern;
496}
497
499 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
500}
501
503 const IsaInfo::AMDGPUTargetID &TargetID) {
504 emitVersion();
506 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
507}
508
510 std::string HSAMetadataString;
511 raw_string_ostream StrOS(HSAMetadataString);
512 HSAMetadataDoc->toYAML(StrOS);
513
514 if (DumpHSAMetadata)
515 dump(StrOS.str());
517 verify(StrOS.str());
518}
519
521 const SIProgramInfo &ProgramInfo) {
522 auto &Func = MF.getFunction();
523 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
524 Func.getCallingConv() != CallingConv::SPIR_KERNEL)
525 return;
526
527 auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent());
528 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
529
530 auto Kernels =
531 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
532
533 {
534 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
535 Kern[".symbol"] = Kern.getDocument()->getNode(
536 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
537 emitKernelLanguage(Func, Kern);
538 emitKernelAttrs(Func, Kern);
539 emitKernelArgs(MF, Kern);
540 }
541
542 Kernels.push_back(Kern);
543}
544
545//===----------------------------------------------------------------------===//
546// HSAMetadataStreamerV4
547//===----------------------------------------------------------------------===//
548
550 auto Version = HSAMetadataDoc->getArrayNode();
551 Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
552 Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
553 getRootMetadata("amdhsa.version") = Version;
554}
555
557 const IsaInfo::AMDGPUTargetID &TargetID) {
558 getRootMetadata("amdhsa.target") =
559 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
560}
561
563 const IsaInfo::AMDGPUTargetID &TargetID) {
564 emitVersion();
565 emitTargetID(TargetID);
567 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
568}
569
570//===----------------------------------------------------------------------===//
571// HSAMetadataStreamerV5
572//===----------------------------------------------------------------------===//
573
575 auto Version = HSAMetadataDoc->getArrayNode();
576 Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
577 Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
578 getRootMetadata("amdhsa.version") = Version;
579}
580
582 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
583 auto &Func = MF.getFunction();
584 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
585
586 // No implicit kernel argument is used.
587 if (ST.getImplicitArgNumBytes(Func) == 0)
588 return;
589
590 const Module *M = Func.getParent();
591 auto &DL = M->getDataLayout();
593
594 auto Int64Ty = Type::getInt64Ty(Func.getContext());
595 auto Int32Ty = Type::getInt32Ty(Func.getContext());
596 auto Int16Ty = Type::getInt16Ty(Func.getContext());
597
598 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
599 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
600 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
601 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
602
603 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
604 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
605 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
606
607 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
608 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
609 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
610
611 // Reserved for hidden_tool_correlation_id.
612 Offset += 8;
613
614 Offset += 8; // Reserved.
615
616 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
617 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
618 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
619
620 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
621
622 Offset += 6; // Reserved.
623 auto Int8PtrTy =
624 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
625
626 if (M->getNamedMetadata("llvm.printf.fmts")) {
627 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
628 Args);
629 } else {
630 Offset += 8; // Skipped.
631 }
632
633 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
634 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
635 Args);
636 } else {
637 Offset += 8; // Skipped.
638 }
639
640 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
641 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
642 Args);
643 } else {
644 Offset += 8; // Skipped.
645 }
646
647 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
648 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
649 else
650 Offset += 8; // Skipped.
651
652 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
653 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
654 Args);
655 } else {
656 Offset += 8; // Skipped.
657 }
658
659 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
660 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
661 Args);
662 } else {
663 Offset += 8; // Skipped.
664 }
665
666 Offset += 72; // Reserved.
667
668 // hidden_private_base and hidden_shared_base are only when the subtarget has
669 // ApertureRegs.
670 if (!ST.hasApertureRegs()) {
671 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
672 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
673 } else {
674 Offset += 8; // Skipped.
675 }
676
677 if (MFI.getUserSGPRInfo().hasQueuePtr())
678 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
679}
680
682 msgpack::MapDocNode Kern) {
684
685 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
686 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
687}
688
689
690} // end namespace HSAMD
691} // end namespace AMDGPU
692} // end namespace llvm
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static std::pair< Type *, Align > getArgumentTypeAlign(const Argument &Arg, const DataLayout &DL)
AMDGPU HSA Metadata Streamer.
Given that RA is a live value
std::string Name
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
#define F(x, y, z)
Definition: MD5.cpp:55
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
ppc ctr loops verify
Defines struct to track resource usage and hardware flags for kernels and entry functions.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern)
std::string getTypeName(Type *Ty, bool Signed) const
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
void dump(StringRef HSAMetadataString) const
std::unique_ptr< msgpack::Document > HSAMetadataDoc
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) override
StringRef getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const
std::optional< StringRef > getAccessQualifier(StringRef AccQual) const
void verify(StringRef HSAMetadataString) const
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern)
void emitKernelArg(const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args)
msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const
std::optional< StringRef > getAddressSpaceQualifier(unsigned AddressSpace) const
msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo, unsigned CodeObjectVersion) const
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID)
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) override
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
Type * getParamByRefType() const
If this is a byref argument, return its type.
Definition: Function.cpp:211
bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
Definition: Function.cpp:242
bool hasByRefAttr() const
Return true if this argument has the byref attribute.
Definition: Function.cpp:115
bool onlyReadsMemory() const
Return true if this argument has the readonly or readnone attribute.
Definition: Function.cpp:278
bool hasAttribute(Attribute::AttrKind Kind) const
Check if an argument has a given attribute.
Definition: Function.cpp:308
const Function * getParent() const
Definition: Argument.h:40
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:46
MaybeAlign getParamAlign() const
If this is a byval or inalloca argument, return its alignment.
Definition: Function.cpp:192
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool hasMAIInsts() const
Definition: GCNSubtarget.h:755
bool supportsWGP() const
Definition: GCNSubtarget.h:318
Metadata node.
Definition: Metadata.h:950
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:704
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:428
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
static IntegerType * getInt16Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
A DocNode that is an array.
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Document * getDocument() const
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode getNode()
Create a nil node associated with this Document.
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
A DocNode that is a map.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:660
unsigned LanguageVersion(SourceLanguage L)
Definition: Dwarf.cpp:380
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:392
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:395
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:394
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:390
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:391
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:396
constexpr uint32_t VersionMajorV5
HSA metadata major version for code object V5.
constexpr uint32_t VersionMinorV4
HSA metadata minor version for code object V4.
ValueKind
Value kinds.
constexpr uint32_t VersionMajorV3
HSA metadata major version for code object V3.
constexpr uint32_t VersionMinorV5
HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV3
HSA metadata minor version for code object V3.
constexpr uint32_t VersionMajorV4
HSA metadata major version for code object V4.
unsigned getCodeObjectVersion(const Module &M)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
AddressSpace
Definition: NVPTXBaseInfo.h:21
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
StringRef getTypeName()
We provide a function which tries to compute the (demangled) name of a type statically.
Definition: TypeName.h:27
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25