LLVM  15.0.0git
AMDGPUHSAMetadataStreamer.cpp
Go to the documentation of this file.
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23 
24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25  const DataLayout &DL) {
26  Type *Ty = Arg.getType();
27  MaybeAlign ArgAlign;
28  if (Arg.hasByRefAttr()) {
29  Ty = Arg.getParamByRefType();
30  ArgAlign = Arg.getParamAlign();
31  }
32 
33  if (!ArgAlign)
34  ArgAlign = DL.getABITypeAlign(Ty);
35 
36  return std::make_pair(Ty, *ArgAlign);
37 }
38 
39 namespace llvm {
40 
42  "amdgpu-dump-hsa-metadata",
43  cl::desc("Dump AMDGPU HSA Metadata"));
45  "amdgpu-verify-hsa-metadata",
46  cl::desc("Verify AMDGPU HSA Metadata"));
47 
48 namespace AMDGPU {
49 namespace HSAMD {
50 
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
55  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56 }
57 
58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
59  errs() << "AMDGPU HSA Metadata Parser Test: ";
60 
61  HSAMD::Metadata FromHSAMetadataString;
62  if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63  errs() << "FAIL\n";
64  return;
65  }
66 
67  std::string ToHSAMetadataString;
68  if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69  errs() << "FAIL\n";
70  return;
71  }
72 
73  errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74  << '\n';
75  if (HSAMetadataString != ToHSAMetadataString) {
76  errs() << "Original input: " << HSAMetadataString << '\n'
77  << "Produced output: " << ToHSAMetadataString << '\n';
78  }
79 }
80 
82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
83  if (AccQual.empty())
85 
87  .Case("read_only", AccessQualifier::ReadOnly)
88  .Case("write_only", AccessQualifier::WriteOnly)
89  .Case("read_write", AccessQualifier::ReadWrite)
91 }
92 
94 MetadataStreamerV2::getAddressSpaceQualifier(
95  unsigned AddressSpace) const {
96  switch (AddressSpace) {
109  default:
111  }
112 }
113 
114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
115  StringRef BaseTypeName) const {
116  if (TypeQual.contains("pipe"))
117  return ValueKind::Pipe;
118 
119  return StringSwitch<ValueKind>(BaseTypeName)
120  .Case("image1d_t", ValueKind::Image)
121  .Case("image1d_array_t", ValueKind::Image)
122  .Case("image1d_buffer_t", ValueKind::Image)
123  .Case("image2d_t", ValueKind::Image)
124  .Case("image2d_array_t", ValueKind::Image)
125  .Case("image2d_array_depth_t", ValueKind::Image)
126  .Case("image2d_array_msaa_t", ValueKind::Image)
127  .Case("image2d_array_msaa_depth_t", ValueKind::Image)
128  .Case("image2d_depth_t", ValueKind::Image)
129  .Case("image2d_msaa_t", ValueKind::Image)
130  .Case("image2d_msaa_depth_t", ValueKind::Image)
131  .Case("image3d_t", ValueKind::Image)
132  .Case("sampler_t", ValueKind::Sampler)
133  .Case("queue_t", ValueKind::Queue)
134  .Default(isa<PointerType>(Ty) ?
135  (Ty->getPointerAddressSpace() ==
140 }
141 
142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
143  switch (Ty->getTypeID()) {
144  case Type::IntegerTyID: {
145  if (!Signed)
146  return (Twine('u') + getTypeName(Ty, true)).str();
147 
148  auto BitWidth = Ty->getIntegerBitWidth();
149  switch (BitWidth) {
150  case 8:
151  return "char";
152  case 16:
153  return "short";
154  case 32:
155  return "int";
156  case 64:
157  return "long";
158  default:
159  return (Twine('i') + Twine(BitWidth)).str();
160  }
161  }
162  case Type::HalfTyID:
163  return "half";
164  case Type::FloatTyID:
165  return "float";
166  case Type::DoubleTyID:
167  return "double";
168  case Type::FixedVectorTyID: {
169  auto VecTy = cast<FixedVectorType>(Ty);
170  auto ElTy = VecTy->getElementType();
171  auto NumElements = VecTy->getNumElements();
172  return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
173  }
174  default:
175  return "unknown";
176  }
177 }
178 
179 std::vector<uint32_t>
180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
181  std::vector<uint32_t> Dims;
182  if (Node->getNumOperands() != 3)
183  return Dims;
184 
185  for (auto &Op : Node->operands())
186  Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
187  return Dims;
188 }
189 
190 Kernel::CodeProps::Metadata
191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
192  const SIProgramInfo &ProgramInfo) const {
193  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
195  HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
196  const Function &F = MF.getFunction();
197 
198  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
199  F.getCallingConv() == CallingConv::SPIR_KERNEL);
200 
201  Align MaxKernArgAlign;
202  HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
203  MaxKernArgAlign);
204  HSACodeProps.mKernargSegmentAlign =
205  std::max(MaxKernArgAlign, Align(4)).value();
206 
207  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
208  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
209  HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
210  HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
211  HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
212  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
213  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
214  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
215  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
216  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
217 
218  return HSACodeProps;
219 }
220 
221 Kernel::DebugProps::Metadata
222 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
223  const SIProgramInfo &ProgramInfo) const {
224  return HSAMD::Kernel::DebugProps::Metadata();
225 }
226 
228  auto &Version = HSAMetadata.mVersion;
229 
230  Version.push_back(VersionMajorV2);
231  Version.push_back(VersionMinorV2);
232 }
233 
234 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
235  auto &Printf = HSAMetadata.mPrintf;
236 
237  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
238  if (!Node)
239  return;
240 
241  for (auto Op : Node->operands())
242  if (Op->getNumOperands())
243  Printf.push_back(
244  std::string(cast<MDString>(Op->getOperand(0))->getString()));
245 }
246 
247 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
248  auto &Kernel = HSAMetadata.mKernels.back();
249 
250  // TODO: What about other languages?
251  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
252  if (!Node || !Node->getNumOperands())
253  return;
254  auto Op0 = Node->getOperand(0);
255  if (Op0->getNumOperands() <= 1)
256  return;
257 
258  Kernel.mLanguage = "OpenCL C";
259  Kernel.mLanguageVersion.push_back(
260  mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
261  Kernel.mLanguageVersion.push_back(
262  mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
263 }
264 
265 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
266  auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
267 
268  if (auto Node = Func.getMetadata("reqd_work_group_size"))
269  Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
270  if (auto Node = Func.getMetadata("work_group_size_hint"))
271  Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
272  if (auto Node = Func.getMetadata("vec_type_hint")) {
273  Attrs.mVecTypeHint = getTypeName(
274  cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
275  mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
276  }
277  if (Func.hasFnAttribute("runtime-handle")) {
278  Attrs.mRuntimeHandle =
279  Func.getFnAttribute("runtime-handle").getValueAsString().str();
280  }
281 }
282 
283 void MetadataStreamerV2::emitKernelArgs(const Function &Func,
284  const GCNSubtarget &ST) {
285  for (auto &Arg : Func.args())
286  emitKernelArg(Arg);
287 
288  emitHiddenKernelArgs(Func, ST);
289 }
290 
291 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
292  auto Func = Arg.getParent();
293  auto ArgNo = Arg.getArgNo();
294  const MDNode *Node;
295 
296  StringRef Name;
297  Node = Func->getMetadata("kernel_arg_name");
298  if (Node && ArgNo < Node->getNumOperands())
299  Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
300  else if (Arg.hasName())
301  Name = Arg.getName();
302 
304  Node = Func->getMetadata("kernel_arg_type");
305  if (Node && ArgNo < Node->getNumOperands())
306  TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
307 
308  StringRef BaseTypeName;
309  Node = Func->getMetadata("kernel_arg_base_type");
310  if (Node && ArgNo < Node->getNumOperands())
311  BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
312 
314  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
315  Arg.hasNoAliasAttr()) {
316  AccQual = "read_only";
317  } else {
318  Node = Func->getMetadata("kernel_arg_access_qual");
319  if (Node && ArgNo < Node->getNumOperands())
320  AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
321  }
322 
323  StringRef TypeQual;
324  Node = Func->getMetadata("kernel_arg_type_qual");
325  if (Node && ArgNo < Node->getNumOperands())
326  TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
327 
328  const DataLayout &DL = Func->getParent()->getDataLayout();
329 
331  if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
332  if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
333  // FIXME: Should report this for all address spaces
334  PointeeAlign = Arg.getParamAlign().valueOrOne();
335  }
336  }
337 
338  Type *ArgTy;
339  Align ArgAlign;
340  std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
341 
342  emitKernelArg(DL, ArgTy, ArgAlign,
343  getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
344  TypeName, BaseTypeName, AccQual, TypeQual);
345 }
346 
347 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
348  Align Alignment, ValueKind ValueKind,
351  StringRef BaseTypeName,
352  StringRef AccQual, StringRef TypeQual) {
353  HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
354  auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
355 
356  Arg.mName = std::string(Name);
357  Arg.mTypeName = std::string(TypeName);
358  Arg.mSize = DL.getTypeAllocSize(Ty);
359  Arg.mAlign = Alignment.value();
360  Arg.mValueKind = ValueKind;
361  Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
362 
363  if (auto PtrTy = dyn_cast<PointerType>(Ty))
364  Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
365 
366  Arg.mAccQual = getAccessQualifier(AccQual);
367 
368  // TODO: Emit Arg.mActualAccQual.
369 
370  SmallVector<StringRef, 1> SplitTypeQuals;
371  TypeQual.split(SplitTypeQuals, " ", -1, false);
372  for (StringRef Key : SplitTypeQuals) {
373  auto P = StringSwitch<bool*>(Key)
374  .Case("const", &Arg.mIsConst)
375  .Case("restrict", &Arg.mIsRestrict)
376  .Case("volatile", &Arg.mIsVolatile)
377  .Case("pipe", &Arg.mIsPipe)
378  .Default(nullptr);
379  if (P)
380  *P = true;
381  }
382 }
383 
384 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
385  const GCNSubtarget &ST) {
386  unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
387  if (!HiddenArgNumBytes)
388  return;
389 
390  auto &DL = Func.getParent()->getDataLayout();
391  auto Int64Ty = Type::getInt64Ty(Func.getContext());
392 
393  if (HiddenArgNumBytes >= 8)
394  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
395  if (HiddenArgNumBytes >= 16)
396  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
397  if (HiddenArgNumBytes >= 24)
398  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
399 
400  auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
402 
403  if (HiddenArgNumBytes >= 32) {
404  // We forbid the use of features requiring hostcall when compiling OpenCL
405  // before code object V5, which makes the mutual exclusion between the
406  // "printf buffer" and "hostcall buffer" here sound.
407  if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
408  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
409  else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
410  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
411  else
412  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
413  }
414 
415  // Emit "default queue" and "completion action" arguments if enqueue kernel is
416  // used, otherwise emit dummy "none" arguments.
417  if (HiddenArgNumBytes >= 48) {
418  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
419  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
420  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
421  } else {
422  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
423  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
424  }
425  }
426 
427  // Emit the pointer argument for multi-grid object.
428  if (HiddenArgNumBytes >= 56) {
429  if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
430  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
431  else
432  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
433  }
434 }
435 
437  return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
438 }
439 
441  const IsaInfo::AMDGPUTargetID &TargetID) {
442  emitVersion();
443  emitPrintf(Mod);
444 }
445 
447  std::string HSAMetadataString;
448  if (toString(HSAMetadata, HSAMetadataString))
449  return;
450 
451  if (DumpHSAMetadata)
452  dump(HSAMetadataString);
453  if (VerifyHSAMetadata)
454  verify(HSAMetadataString);
455 }
456 
458  const SIProgramInfo &ProgramInfo) {
459  auto &Func = MF.getFunction();
460  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
461  return;
462 
463  auto CodeProps = getHSACodeProps(MF, ProgramInfo);
464  auto DebugProps = getHSADebugProps(MF, ProgramInfo);
465 
466  HSAMetadata.mKernels.push_back(Kernel::Metadata());
467  auto &Kernel = HSAMetadata.mKernels.back();
468 
469  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
470  Kernel.mName = std::string(Func.getName());
471  Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
472  emitKernelLanguage(Func);
473  emitKernelAttrs(Func);
474  emitKernelArgs(Func, ST);
475  HSAMetadata.mKernels.back().mCodeProps = CodeProps;
476  HSAMetadata.mKernels.back().mDebugProps = DebugProps;
477 }
478 
479 //===----------------------------------------------------------------------===//
480 // HSAMetadataStreamerV3
481 //===----------------------------------------------------------------------===//
482 
483 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
484  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
485 }
486 
487 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
488  errs() << "AMDGPU HSA Metadata Parser Test: ";
489 
490  msgpack::Document FromHSAMetadataString;
491 
492  if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
493  errs() << "FAIL\n";
494  return;
495  }
496 
497  std::string ToHSAMetadataString;
498  raw_string_ostream StrOS(ToHSAMetadataString);
499  FromHSAMetadataString.toYAML(StrOS);
500 
501  errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
502  if (HSAMetadataString != ToHSAMetadataString) {
503  errs() << "Original input: " << HSAMetadataString << '\n'
504  << "Produced output: " << StrOS.str() << '\n';
505  }
506 }
507 
511  .Case("read_only", StringRef("read_only"))
512  .Case("write_only", StringRef("write_only"))
513  .Case("read_write", StringRef("read_write"))
514  .Default(None);
515 }
516 
519  switch (AddressSpace) {
521  return StringRef("private");
523  return StringRef("global");
525  return StringRef("constant");
527  return StringRef("local");
529  return StringRef("generic");
531  return StringRef("region");
532  default:
533  return None;
534  }
535 }
536 
538  StringRef BaseTypeName) const {
539  if (TypeQual.contains("pipe"))
540  return "pipe";
541 
542  return StringSwitch<StringRef>(BaseTypeName)
543  .Case("image1d_t", "image")
544  .Case("image1d_array_t", "image")
545  .Case("image1d_buffer_t", "image")
546  .Case("image2d_t", "image")
547  .Case("image2d_array_t", "image")
548  .Case("image2d_array_depth_t", "image")
549  .Case("image2d_array_msaa_t", "image")
550  .Case("image2d_array_msaa_depth_t", "image")
551  .Case("image2d_depth_t", "image")
552  .Case("image2d_msaa_t", "image")
553  .Case("image2d_msaa_depth_t", "image")
554  .Case("image3d_t", "image")
555  .Case("sampler_t", "sampler")
556  .Case("queue_t", "queue")
557  .Default(isa<PointerType>(Ty)
559  ? "dynamic_shared_pointer"
560  : "global_buffer")
561  : "by_value");
562 }
563 
564 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
565  switch (Ty->getTypeID()) {
566  case Type::IntegerTyID: {
567  if (!Signed)
568  return (Twine('u') + getTypeName(Ty, true)).str();
569 
570  auto BitWidth = Ty->getIntegerBitWidth();
571  switch (BitWidth) {
572  case 8:
573  return "char";
574  case 16:
575  return "short";
576  case 32:
577  return "int";
578  case 64:
579  return "long";
580  default:
581  return (Twine('i') + Twine(BitWidth)).str();
582  }
583  }
584  case Type::HalfTyID:
585  return "half";
586  case Type::FloatTyID:
587  return "float";
588  case Type::DoubleTyID:
589  return "double";
590  case Type::FixedVectorTyID: {
591  auto VecTy = cast<FixedVectorType>(Ty);
592  auto ElTy = VecTy->getElementType();
593  auto NumElements = VecTy->getNumElements();
594  return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
595  }
596  default:
597  return "unknown";
598  }
599 }
600 
603  auto Dims = HSAMetadataDoc->getArrayNode();
604  if (Node->getNumOperands() != 3)
605  return Dims;
606 
607  for (auto &Op : Node->operands())
608  Dims.push_back(Dims.getDocument()->getNode(
609  uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
610  return Dims;
611 }
612 
614  auto Version = HSAMetadataDoc->getArrayNode();
615  Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
616  Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
617  getRootMetadata("amdhsa.version") = Version;
618 }
619 
621  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
622  if (!Node)
623  return;
624 
625  auto Printf = HSAMetadataDoc->getArrayNode();
626  for (auto Op : Node->operands())
627  if (Op->getNumOperands())
628  Printf.push_back(Printf.getDocument()->getNode(
629  cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
630  getRootMetadata("amdhsa.printf") = Printf;
631 }
632 
634  msgpack::MapDocNode Kern) {
635  // TODO: What about other languages?
636  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
637  if (!Node || !Node->getNumOperands())
638  return;
639  auto Op0 = Node->getOperand(0);
640  if (Op0->getNumOperands() <= 1)
641  return;
642 
643  Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
644  auto LanguageVersion = Kern.getDocument()->getArrayNode();
645  LanguageVersion.push_back(Kern.getDocument()->getNode(
646  mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
647  LanguageVersion.push_back(Kern.getDocument()->getNode(
648  mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
649  Kern[".language_version"] = LanguageVersion;
650 }
651 
653  msgpack::MapDocNode Kern) {
654 
655  if (auto Node = Func.getMetadata("reqd_work_group_size"))
656  Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
657  if (auto Node = Func.getMetadata("work_group_size_hint"))
658  Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
659  if (auto Node = Func.getMetadata("vec_type_hint")) {
660  Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
661  getTypeName(
662  cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
663  mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
664  /*Copy=*/true);
665  }
666  if (Func.hasFnAttribute("runtime-handle")) {
667  Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
668  Func.getFnAttribute("runtime-handle").getValueAsString().str(),
669  /*Copy=*/true);
670  }
671  if (Func.hasFnAttribute("device-init"))
672  Kern[".kind"] = Kern.getDocument()->getNode("init");
673  else if (Func.hasFnAttribute("device-fini"))
674  Kern[".kind"] = Kern.getDocument()->getNode("fini");
675 }
676 
678  msgpack::MapDocNode Kern) {
679  auto &Func = MF.getFunction();
680  unsigned Offset = 0;
681  auto Args = HSAMetadataDoc->getArrayNode();
682  for (auto &Arg : Func.args())
683  emitKernelArg(Arg, Offset, Args);
684 
685  emitHiddenKernelArgs(MF, Offset, Args);
686 
687  Kern[".args"] = Args;
688 }
689 
690 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
692  auto Func = Arg.getParent();
693  auto ArgNo = Arg.getArgNo();
694  const MDNode *Node;
695 
696  StringRef Name;
697  Node = Func->getMetadata("kernel_arg_name");
698  if (Node && ArgNo < Node->getNumOperands())
699  Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
700  else if (Arg.hasName())
701  Name = Arg.getName();
702 
704  Node = Func->getMetadata("kernel_arg_type");
705  if (Node && ArgNo < Node->getNumOperands())
706  TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
707 
708  StringRef BaseTypeName;
709  Node = Func->getMetadata("kernel_arg_base_type");
710  if (Node && ArgNo < Node->getNumOperands())
711  BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
712 
714  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
715  Arg.hasNoAliasAttr()) {
716  AccQual = "read_only";
717  } else {
718  Node = Func->getMetadata("kernel_arg_access_qual");
719  if (Node && ArgNo < Node->getNumOperands())
720  AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
721  }
722 
723  StringRef TypeQual;
724  Node = Func->getMetadata("kernel_arg_type_qual");
725  if (Node && ArgNo < Node->getNumOperands())
726  TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
727 
728  const DataLayout &DL = Func->getParent()->getDataLayout();
729 
731  Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
732 
733  // FIXME: Need to distinguish in memory alignment from pointer alignment.
734  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
735  if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
736  PointeeAlign = Arg.getParamAlign().valueOrOne();
737  }
738 
739  // There's no distinction between byval aggregates and raw aggregates.
740  Type *ArgTy;
741  Align ArgAlign;
742  std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
743 
744  emitKernelArg(DL, ArgTy, ArgAlign,
745  getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
746  PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
747 }
748 
750  const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
752  StringRef Name, StringRef TypeName, StringRef BaseTypeName,
753  StringRef AccQual, StringRef TypeQual) {
754  auto Arg = Args.getDocument()->getMapNode();
755 
756  if (!Name.empty())
757  Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
758  if (!TypeName.empty())
759  Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
760  auto Size = DL.getTypeAllocSize(Ty);
761  Arg[".size"] = Arg.getDocument()->getNode(Size);
762  Offset = alignTo(Offset, Alignment);
763  Arg[".offset"] = Arg.getDocument()->getNode(Offset);
764  Offset += Size;
765  Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
766  if (PointeeAlign)
767  Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
768 
769  if (auto PtrTy = dyn_cast<PointerType>(Ty))
770  if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
771  Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
772 
773  if (auto AQ = getAccessQualifier(AccQual))
774  Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
775 
776  // TODO: Emit Arg[".actual_access"].
777 
778  SmallVector<StringRef, 1> SplitTypeQuals;
779  TypeQual.split(SplitTypeQuals, " ", -1, false);
780  for (StringRef Key : SplitTypeQuals) {
781  if (Key == "const")
782  Arg[".is_const"] = Arg.getDocument()->getNode(true);
783  else if (Key == "restrict")
784  Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
785  else if (Key == "volatile")
786  Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
787  else if (Key == "pipe")
788  Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
789  }
790 
791  Args.push_back(Arg);
792 }
793 
795  unsigned &Offset,
797  auto &Func = MF.getFunction();
798  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
799 
800  unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
801  if (!HiddenArgNumBytes)
802  return;
803 
804  const Module *M = Func.getParent();
805  auto &DL = M->getDataLayout();
806  auto Int64Ty = Type::getInt64Ty(Func.getContext());
807 
808  Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
809 
810  if (HiddenArgNumBytes >= 8)
811  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
812  Args);
813  if (HiddenArgNumBytes >= 16)
814  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
815  Args);
816  if (HiddenArgNumBytes >= 24)
817  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
818  Args);
819 
820  auto Int8PtrTy =
821  Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
822 
823  if (HiddenArgNumBytes >= 32) {
824  // We forbid the use of features requiring hostcall when compiling OpenCL
825  // before code object V5, which makes the mutual exclusion between the
826  // "printf buffer" and "hostcall buffer" here sound.
827  if (M->getNamedMetadata("llvm.printf.fmts"))
828  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
829  Args);
830  else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
831  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
832  Args);
833  else
834  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
835  }
836 
837  // Emit "default queue" and "completion action" arguments if enqueue kernel is
838  // used, otherwise emit dummy "none" arguments.
839  if (HiddenArgNumBytes >= 48) {
840  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
841  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
842  Args);
843  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
844  Args);
845  } else {
846  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
847  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
848  }
849  }
850 
851  // Emit the pointer argument for multi-grid object.
852  if (HiddenArgNumBytes >= 56) {
853  if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
854  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
855  Args);
856  } else {
857  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
858  }
859  }
860 }
861 
864  const SIProgramInfo &ProgramInfo) const {
865  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
867  const Function &F = MF.getFunction();
868 
869  auto Kern = HSAMetadataDoc->getMapNode();
870 
871  Align MaxKernArgAlign;
872  Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
873  STM.getKernArgSegmentSize(F, MaxKernArgAlign));
874  Kern[".group_segment_fixed_size"] =
875  Kern.getDocument()->getNode(ProgramInfo.LDSSize);
876  Kern[".private_segment_fixed_size"] =
877  Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
878 
879  // FIXME: The metadata treats the minimum as 16?
880  Kern[".kernarg_segment_align"] =
881  Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
882  Kern[".wavefront_size"] =
883  Kern.getDocument()->getNode(STM.getWavefrontSize());
884  Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
885  Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
886 
887  // Only add AGPR count to metadata for supported devices
888  if (STM.hasMAIInsts()) {
889  Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
890  }
891 
892  Kern[".max_flat_workgroup_size"] =
893  Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
894  Kern[".sgpr_spill_count"] =
895  Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
896  Kern[".vgpr_spill_count"] =
897  Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
898 
899  return Kern;
900 }
901 
903  return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
904 }
905 
907  const IsaInfo::AMDGPUTargetID &TargetID) {
908  emitVersion();
909  emitPrintf(Mod);
910  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
911 }
912 
914  std::string HSAMetadataString;
915  raw_string_ostream StrOS(HSAMetadataString);
916  HSAMetadataDoc->toYAML(StrOS);
917 
918  if (DumpHSAMetadata)
919  dump(StrOS.str());
920  if (VerifyHSAMetadata)
921  verify(StrOS.str());
922 }
923 
925  const SIProgramInfo &ProgramInfo) {
926  auto &Func = MF.getFunction();
927  auto Kern = getHSAKernelProps(MF, ProgramInfo);
928 
929  assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
930  Func.getCallingConv() == CallingConv::SPIR_KERNEL);
931 
932  auto Kernels =
933  getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
934 
935  {
936  Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
937  Kern[".symbol"] = Kern.getDocument()->getNode(
938  (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
939  emitKernelLanguage(Func, Kern);
940  emitKernelAttrs(Func, Kern);
941  emitKernelArgs(MF, Kern);
942  }
943 
944  Kernels.push_back(Kern);
945 }
946 
947 //===----------------------------------------------------------------------===//
948 // HSAMetadataStreamerV4
949 //===----------------------------------------------------------------------===//
950 
952  auto Version = HSAMetadataDoc->getArrayNode();
953  Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
954  Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
955  getRootMetadata("amdhsa.version") = Version;
956 }
957 
959  getRootMetadata("amdhsa.target") =
960  HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
961 }
962 
964  const IsaInfo::AMDGPUTargetID &TargetID) {
965  emitVersion();
966  emitTargetID(TargetID);
967  emitPrintf(Mod);
968  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
969 }
970 
971 //===----------------------------------------------------------------------===//
972 // HSAMetadataStreamerV5
973 //===----------------------------------------------------------------------===//
974 
976  auto Version = HSAMetadataDoc->getArrayNode();
977  Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
978  Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
979  getRootMetadata("amdhsa.version") = Version;
980 }
981 
983  unsigned &Offset,
985  auto &Func = MF.getFunction();
986  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
987 
988  // No implicit kernel argument is used.
989  if (ST.getImplicitArgNumBytes(Func) == 0)
990  return;
991 
992  const Module *M = Func.getParent();
993  auto &DL = M->getDataLayout();
995 
996  auto Int64Ty = Type::getInt64Ty(Func.getContext());
997  auto Int32Ty = Type::getInt32Ty(Func.getContext());
998  auto Int16Ty = Type::getInt16Ty(Func.getContext());
999 
1000  Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
1001  emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
1002  emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
1003  emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
1004 
1005  emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
1006  emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
1007  emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
1008 
1009  emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
1010  emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
1011  emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
1012 
1013  // Reserved for hidden_tool_correlation_id.
1014  Offset += 8;
1015 
1016  Offset += 8; // Reserved.
1017 
1018  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
1019  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
1020  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
1021 
1022  emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
1023 
1024  Offset += 6; // Reserved.
1025  auto Int8PtrTy =
1026  Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
1027 
1028  if (M->getNamedMetadata("llvm.printf.fmts")) {
1029  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
1030  Args);
1031  } else {
1032  Offset += 8; // Skipped.
1033  }
1034 
1035  if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
1036  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
1037  Args);
1038  } else {
1039  Offset += 8; // Skipped.
1040  }
1041 
1042  if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
1043  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
1044  Args);
1045  } else {
1046  Offset += 8; // Skipped.
1047  }
1048 
1049  if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
1050  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1051  else
1052  Offset += 8; // Skipped.
1053 
1054  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
1055  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
1056  Args);
1057  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
1058  Args);
1059  } else {
1060  Offset += 16; // Skipped.
1061  }
1062 
1063  Offset += 72; // Reserved.
1064 
1065  // hidden_private_base and hidden_shared_base are only when the subtarget has
1066  // ApertureRegs.
1067  if (!ST.hasApertureRegs()) {
1068  emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
1069  emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
1070  } else {
1071  Offset += 8; // Skipped.
1072  }
1073 
1074  if (MFI.hasQueuePtr())
1075  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
1076 }
1077 
1078 } // end namespace HSAMD
1079 } // end namespace AMDGPU
1080 } // end namespace llvm
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::AMDGPU::HSAMD::MetadataStreamerV5::emitVersion
void emitVersion() override
Definition: AMDGPUHSAMetadataStreamer.cpp:975
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4635
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernel
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
Definition: AMDGPUHSAMetadataStreamer.cpp:924
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AMDGPU::HSAMD::AccessQualifier::Unknown
@ Unknown
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::msgpack::DocNode::getDocument
Document * getDocument() const
Definition: MsgPackDocument.h:80
SIMachineFunctionInfo.h
llvm::AMDGPU::HSAMD::Metadata::mVersion
std::vector< uint32_t > mVersion
HSA metadata version. Required.
Definition: AMDGPUMetadata.h:438
llvm::Function
Definition: Function.h:60
llvm::AMDGPU::HSAMD::MetadataStreamerV2::end
void end() override
Definition: AMDGPUHSAMetadataStreamer.cpp:446
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::StringSwitch::Default
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:183
llvm::AMDGPU::HSAMD::ValueKind::ByValue
@ ByValue
AMDGPUHSAMetadataStreamer.h
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:632
llvm::SmallVector< StringRef, 1 >
llvm::AMDGPU::HSAMD::MetadataStreamerV3::HSAMetadataDoc
std::unique_ptr< msgpack::Document > HSAMetadataDoc
Definition: AMDGPUHSAMetadataStreamer.h:66
llvm::AMDGPU::HSAMD::MetadataStreamerV5::emitHiddenKernelArgs
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
Definition: AMDGPUHSAMetadataStreamer.cpp:982
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::SIProgramInfo::NumSGPR
uint32_t NumSGPR
Definition: SIProgramInfo.h:51
llvm::AMDGPU::HSAMD::VersionMajorV4
constexpr uint32_t VersionMajorV4
HSA metadata major version for code object V4.
Definition: AMDGPUMetadata.h:43
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Region
@ Region
llvm::AMDGPU::HSAMD::MetadataStreamerV3::dump
void dump(StringRef HSAMetadataString) const
Definition: AMDGPUHSAMetadataStreamer.cpp:483
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitVersion
void emitVersion() override
Definition: AMDGPUHSAMetadataStreamer.cpp:613
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::Optional
Definition: APInt.h:33
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::SIProgramInfo::NumVGPR
uint32_t NumVGPR
Definition: SIProgramInfo.h:46
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getWorkGroupDimensions
msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const
Definition: AMDGPUHSAMetadataStreamer.cpp:602
llvm::AMDGPU::HSAMD::MetadataStreamerV4::begin
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
Definition: AMDGPUHSAMetadataStreamer.cpp:963
llvm::SIProgramInfo::LDSSize
uint32_t LDSSize
Definition: SIProgramInfo.h:52
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:547
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPU::HSAMD::VersionMajorV2
constexpr uint32_t VersionMajorV2
HSA metadata major version for code object V2.
Definition: AMDGPUMetadata.h:33
llvm::AMDGPU::HSAMD::AccessQualifier
AccessQualifier
Access qualifiers.
Definition: AMDGPUMetadata.h:58
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
llvm::AMDGPU::HSAMD::Kernel::Key::DebugProps
constexpr char DebugProps[]
Key for Kernel::Metadata::mDebugProps.
Definition: AMDGPUMetadata.h:398
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::AMDGPU::HSAMD::fromString
std::error_code fromString(StringRef String, Metadata &HSAMetadata)
Converts String to HSAMetadata.
Definition: AMDGPUMetadata.cpp:213
llvm::AMDGPU::HSAMD::Key::Kernels
constexpr char Kernels[]
Key for HSA::Metadata::mKernels.
Definition: AMDGPUMetadata.h:432
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelLanguage
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern)
Definition: AMDGPUHSAMetadataStreamer.cpp:633
llvm::AMDGPU::HSAMD::ValueKind
ValueKind
Value kinds.
Definition: AMDGPUMetadata.h:78
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:105
llvm::AMDGPUTargetStreamer::EmitHSAMetadata
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)=0
Emit HSA Metadata.
llvm::AMDGPU::HSAMD::ValueKind::HiddenCompletionAction
@ HiddenCompletionAction
GCNSubtarget.h
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:738
llvm::StringRef::split
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:749
llvm::AMDGPU::HSAMD::VersionMinorV2
constexpr uint32_t VersionMinorV2
HSA metadata minor version for code object V2.
Definition: AMDGPUMetadata.h:35
llvm::SIProgramInfo::ScratchSize
uint64_t ScratchSize
Definition: SIProgramInfo.h:37
llvm::msgpack::Document::getArrayNode
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
Definition: MsgPackDocument.h:380
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::AccQual
constexpr char AccQual[]
Key for Kernel::Arg::Metadata::mAccQual.
Definition: AMDGPUMetadata.h:191
llvm::AMDGPU::HSAMD::Kernel::Key::CodeProps
constexpr char CodeProps[]
Key for Kernel::Metadata::mCodeProps.
Definition: AMDGPUMetadata.h:396
llvm::AMDGPU::HSAMD::MetadataStreamerV3::begin
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
Definition: AMDGPUHSAMetadataStreamer.cpp:906
llvm::AMDGPU::HSAMD::ValueKind::HiddenGlobalOffsetY
@ HiddenGlobalOffsetY
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:486
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::StringRef::contains
LLVM_NODISCARD bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:462
llvm::AMDGPU::HSAMD::ValueKind::HiddenGlobalOffsetZ
@ HiddenGlobalOffsetZ
llvm::msgpack::Document
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
Definition: MsgPackDocument.h:272
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getAddressSpaceQualifier
Optional< StringRef > getAddressSpaceQualifier(unsigned AddressSpace) const
Definition: AMDGPUHSAMetadataStreamer.cpp:518
SIProgramInfo.h
getArgumentTypeAlign
static std::pair< Type *, Align > getArgumentTypeAlign(const Argument &Arg, const DataLayout &DL)
Definition: AMDGPUHSAMetadataStreamer.cpp:24
llvm::AMDGPU::HSAMD::MetadataStreamerV4::emitVersion
void emitVersion() override
Definition: AMDGPUHSAMetadataStreamer.cpp:951
llvm::SIProgramInfo::DynamicCallStack
bool DynamicCallStack
Definition: SIProgramInfo.h:66
llvm::AMDGPU::HSAMD::MetadataStreamerV3::verify
void verify(StringRef HSAMetadataString) const
Definition: AMDGPUHSAMetadataStreamer.cpp:487
llvm::AMDGPU::HSAMD::ValueKind::GlobalBuffer
@ GlobalBuffer
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::None
const NoneType None
Definition: None.h:24
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelArgs
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern)
Definition: AMDGPUHSAMetadataStreamer.cpp:677
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::msgpack::Document::fromYAML
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
Definition: MsgPackDocumentYAML.cpp:242
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Global
@ Global
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getRootMetadata
msgpack::DocNode & getRootMetadata(StringRef Key)
Definition: AMDGPUHSAMetadataStreamer.h:110
llvm::AMDGPU::HSAMD::AccessQualifier::WriteOnly
@ WriteOnly
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::SPIRV::Decoration::Alignment
@ Alignment
llvm::AMDGPU::HSAMD::VersionMajorV5
constexpr uint32_t VersionMajorV5
HSA metadata major version for code object V5.
Definition: AMDGPUMetadata.h:48
llvm::cl::opt< bool >
llvm::AMDGPU::HSAMD::ValueKind::Image
@ Image
llvm::AMDGPU::HSAMD::VersionMinorV5
constexpr uint32_t VersionMinorV5
HSA metadata minor version for code object V5.
Definition: AMDGPUMetadata.h:50
llvm::msgpack::MapDocNode
A DocNode that is a map.
Definition: MsgPackDocument.h:219
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::AMDGPU::HSAMD::Metadata::mPrintf
std::vector< std::string > mPrintf
Printf metadata. Optional.
Definition: AMDGPUMetadata.h:440
AMDGPUTargetStreamer.h
uint64_t
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Local
@ Local
llvm::AMDGPU::HSAMD::AccessQualifier::ReadOnly
@ ReadOnly
llvm::AMDGPU::HSAMD::AccessQualifier::ReadWrite
@ ReadWrite
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPU::HSAMD::Kernel::Metadata
In-memory representation of kernel metadata.
Definition: AMDGPUMetadata.h:402
llvm::AMDGPU::HSAMD::ValueKind::HiddenDefaultQueue
@ HiddenDefaultQueue
llvm::AMDGPU::HSAMD::ValueKind::HiddenNone
@ HiddenNone
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelAttrs
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern)
Definition: AMDGPUHSAMetadataStreamer.cpp:652
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::PointeeAlign
constexpr char PointeeAlign[]
Key for Kernel::Arg::Metadata::mPointeeAlign.
Definition: AMDGPUMetadata.h:187
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Generic
@ Generic
llvm::AMDGPU::HSAMD::ValueKind::Pipe
@ Pipe
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::msgpack::Document::getNode
DocNode getNode()
Create a nil node associated with this Document.
Definition: MsgPackDocument.h:308
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Unknown
@ Unknown
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Constant
@ Constant
llvm::MDNode
Metadata node.
Definition: Metadata.h:926
llvm::AMDGPU::HSAMD::ValueKind::HiddenPrintfBuffer
@ HiddenPrintfBuffer
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelArg
void emitKernelArg(const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args)
Definition: AMDGPUHSAMetadataStreamer.cpp:690
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::AMDGPU::HSAMD::VersionMajorV3
constexpr uint32_t VersionMajorV3
HSA metadata major version for code object V3.
Definition: AMDGPUMetadata.h:38
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::AMDGPU::HSAMD::MetadataStreamerV4::emitTargetID
void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID)
Definition: AMDGPUHSAMetadataStreamer.cpp:958
llvm::AMDGPU::HSAMD::ValueKind::Sampler
@ Sampler
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitTo
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
Definition: AMDGPUHSAMetadataStreamer.cpp:902
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPU::HSAMD::Kernel::Key::LanguageVersion
constexpr char LanguageVersion[]
Key for Kernel::Metadata::mLanguageVersion.
Definition: AMDGPUMetadata.h:390
llvm::AMDGPU::HSAMD::Metadata::mKernels
std::vector< Kernel::Metadata > mKernels
Kernels metadata. Required.
Definition: AMDGPUMetadata.h:442
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
llvm::AMDGPUTargetStreamer
Definition: AMDGPUTargetStreamer.h:34
llvm::AMDGPU::HSAMD::ValueKind::HiddenMultiGridSyncArg
@ HiddenMultiGridSyncArg
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::AMDGPU::HSAMD::AddressSpaceQualifier
AddressSpaceQualifier
Address space qualifiers.
Definition: AMDGPUMetadata.h:67
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitPrintf
void emitPrintf(const Module &Mod)
Definition: AMDGPUHSAMetadataStreamer.cpp:620
llvm::SIProgramInfo::NumAccVGPR
uint32_t NumAccVGPR
Definition: SIProgramInfo.h:48
llvm::AMDGPU::HSAMD::ValueKind::DynamicSharedPointer
@ DynamicSharedPointer
llvm::Type::IntegerTyID
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
llvm::AMDGPU::HSAMD::MetadataStreamerV3::end
void end() override
Definition: AMDGPUHSAMetadataStreamer.cpp:913
llvm::AMDGPU::HSAMD::ValueKind::HiddenHostcallBuffer
@ HiddenHostcallBuffer
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::toString
std::string toString() const
Definition: AMDGPUBaseInfo.cpp:478
llvm::AMDGPU::HSAMD::MetadataStreamerV2::emitVersion
void emitVersion() override
Definition: AMDGPUHSAMetadataStreamer.cpp:227
llvm::msgpack::Document::toYAML
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
Definition: MsgPackDocumentYAML.cpp:236
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::AMDGPU::HSAMD::MetadataStreamerV2::emitTo
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
Definition: AMDGPUHSAMetadataStreamer.cpp:436
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:606
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::TypeName
constexpr char TypeName[]
Key for Kernel::Arg::Metadata::mTypeName.
Definition: AMDGPUMetadata.h:175
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Type::FixedVectorTyID
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
llvm::VerifyHSAMetadata
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
llvm::msgpack::DocNode::getArray
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Definition: MsgPackDocument.h:129
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getHSAKernelProps
msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const
Definition: AMDGPUHSAMetadataStreamer.cpp:863
llvm::DumpHSAMetadata
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
llvm::AMDGPU::HSAMD::AccessQualifier::Default
@ Default
llvm::AMDGPU::HSAMD::Key::Printf
constexpr char Printf[]
Key for HSA::Metadata::mPrintf.
Definition: AMDGPUMetadata.h:430
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getValueKind
StringRef getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const
Definition: AMDGPUHSAMetadataStreamer.cpp:537
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::msgpack::ArrayDocNode
A DocNode that is an array.
Definition: MsgPackDocument.h:249
llvm::SIProgramInfo
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:533
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getTypeName
std::string getTypeName(Type *Ty, bool Signed) const
Definition: AMDGPUHSAMetadataStreamer.cpp:564
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Private
@ Private
Version
uint64_t Version
Definition: RawMemProfReader.cpp:40
llvm::AMDGPU::HSAMD::MetadataStreamerV2::begin
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
Definition: AMDGPUHSAMetadataStreamer.cpp:440
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:348
llvm::Module::getNamedMetadata
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:251
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SIMachineFunctionInfo::hasQueuePtr
bool hasQueuePtr() const
Definition: SIMachineFunctionInfo.h:671
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getAccessQualifier
Optional< StringRef > getAccessQualifier(StringRef AccQual) const
Definition: AMDGPUHSAMetadataStreamer.cpp:509
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:238
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
llvm::AMDGPU::HSAMD::MetadataStreamerV2::emitKernel
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
Definition: AMDGPUHSAMetadataStreamer.cpp:457
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitHiddenKernelArgs
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
Definition: AMDGPUHSAMetadataStreamer.cpp:794
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::cl::desc
Definition: CommandLine.h:405
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::AMDGPU::HSAMD::VersionMinorV3
constexpr uint32_t VersionMinorV3
HSA metadata minor version for code object V3.
Definition: AMDGPUMetadata.h:40
llvm::raw_string_ostream::str
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:650
llvm::Type::HalfTyID
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
llvm::AMDGPU::HSAMD::ValueKind::HiddenGlobalOffsetX
@ HiddenGlobalOffsetX
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:712
llvm::AMDGPU::HSAMD::VersionMinorV4
constexpr uint32_t VersionMinorV4
HSA metadata minor version for code object V4.
Definition: AMDGPUMetadata.h:45
llvm::AMDGPU::HSAMD::toString
std::error_code toString(Metadata HSAMetadata, std::string &String)
Converts HSAMetadata to String.
Definition: AMDGPUMetadata.cpp:219