LLVM  16.0.0git
AMDGPUHSAMetadataStreamer.cpp
Go to the documentation of this file.
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23 
24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25  const DataLayout &DL) {
26  Type *Ty = Arg.getType();
27  MaybeAlign ArgAlign;
28  if (Arg.hasByRefAttr()) {
29  Ty = Arg.getParamByRefType();
30  ArgAlign = Arg.getParamAlign();
31  }
32 
33  if (!ArgAlign)
34  ArgAlign = DL.getABITypeAlign(Ty);
35 
36  return std::make_pair(Ty, *ArgAlign);
37 }
38 
39 namespace llvm {
40 
42  "amdgpu-dump-hsa-metadata",
43  cl::desc("Dump AMDGPU HSA Metadata"));
45  "amdgpu-verify-hsa-metadata",
46  cl::desc("Verify AMDGPU HSA Metadata"));
47 
48 namespace AMDGPU {
49 namespace HSAMD {
50 
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
54 void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const {
55  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56 }
57 
58 void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const {
59  errs() << "AMDGPU HSA Metadata Parser Test: ";
60 
61  HSAMD::Metadata FromHSAMetadataString;
62  if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63  errs() << "FAIL\n";
64  return;
65  }
66 
67  std::string ToHSAMetadataString;
68  if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69  errs() << "FAIL\n";
70  return;
71  }
72 
73  errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74  << '\n';
75  if (HSAMetadataString != ToHSAMetadataString) {
76  errs() << "Original input: " << HSAMetadataString << '\n'
77  << "Produced output: " << ToHSAMetadataString << '\n';
78  }
79 }
80 
82 MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const {
83  if (AccQual.empty())
85 
87  .Case("read_only", AccessQualifier::ReadOnly)
88  .Case("write_only", AccessQualifier::WriteOnly)
89  .Case("read_write", AccessQualifier::ReadWrite)
91 }
92 
94 MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const {
95  switch (AddressSpace) {
108  default:
110  }
111 }
112 
113 ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual,
114  StringRef BaseTypeName) const {
115  if (TypeQual.contains("pipe"))
116  return ValueKind::Pipe;
117 
118  return StringSwitch<ValueKind>(BaseTypeName)
119  .Case("image1d_t", ValueKind::Image)
120  .Case("image1d_array_t", ValueKind::Image)
121  .Case("image1d_buffer_t", ValueKind::Image)
122  .Case("image2d_t", ValueKind::Image)
123  .Case("image2d_array_t", ValueKind::Image)
124  .Case("image2d_array_depth_t", ValueKind::Image)
125  .Case("image2d_array_msaa_t", ValueKind::Image)
126  .Case("image2d_array_msaa_depth_t", ValueKind::Image)
127  .Case("image2d_depth_t", ValueKind::Image)
128  .Case("image2d_msaa_t", ValueKind::Image)
129  .Case("image2d_msaa_depth_t", ValueKind::Image)
130  .Case("image3d_t", ValueKind::Image)
131  .Case("sampler_t", ValueKind::Sampler)
132  .Case("queue_t", ValueKind::Queue)
133  .Default(isa<PointerType>(Ty) ?
134  (Ty->getPointerAddressSpace() ==
139 }
140 
141 std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const {
142  switch (Ty->getTypeID()) {
143  case Type::IntegerTyID: {
144  if (!Signed)
145  return (Twine('u') + getTypeName(Ty, true)).str();
146 
147  auto BitWidth = Ty->getIntegerBitWidth();
148  switch (BitWidth) {
149  case 8:
150  return "char";
151  case 16:
152  return "short";
153  case 32:
154  return "int";
155  case 64:
156  return "long";
157  default:
158  return (Twine('i') + Twine(BitWidth)).str();
159  }
160  }
161  case Type::HalfTyID:
162  return "half";
163  case Type::FloatTyID:
164  return "float";
165  case Type::DoubleTyID:
166  return "double";
167  case Type::FixedVectorTyID: {
168  auto VecTy = cast<FixedVectorType>(Ty);
169  auto ElTy = VecTy->getElementType();
170  auto NumElements = VecTy->getNumElements();
171  return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
172  }
173  default:
174  return "unknown";
175  }
176 }
177 
178 std::vector<uint32_t>
179 MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const {
180  std::vector<uint32_t> Dims;
181  if (Node->getNumOperands() != 3)
182  return Dims;
183 
184  for (auto &Op : Node->operands())
185  Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
186  return Dims;
187 }
188 
189 Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps(
190  const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
191  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
193  HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
194  const Function &F = MF.getFunction();
195 
196  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
197  F.getCallingConv() == CallingConv::SPIR_KERNEL);
198 
199  Align MaxKernArgAlign;
200  HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
201  MaxKernArgAlign);
202  HSACodeProps.mKernargSegmentAlign =
203  std::max(MaxKernArgAlign, Align(4)).value();
204 
205  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
206  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
207  HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
208  HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
209  HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
210  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
211  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
212  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
213  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
214  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
215 
216  return HSACodeProps;
217 }
218 
219 Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps(
220  const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
221  return HSAMD::Kernel::DebugProps::Metadata();
222 }
223 
225  auto &Version = HSAMetadata.mVersion;
226 
227  Version.push_back(VersionMajorV2);
228  Version.push_back(VersionMinorV2);
229 }
230 
231 void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) {
232  auto &Printf = HSAMetadata.mPrintf;
233 
234  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
235  if (!Node)
236  return;
237 
238  for (auto *Op : Node->operands())
239  if (Op->getNumOperands())
240  Printf.push_back(
241  std::string(cast<MDString>(Op->getOperand(0))->getString()));
242 }
243 
244 void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) {
245  auto &Kernel = HSAMetadata.mKernels.back();
246 
247  // TODO: What about other languages?
248  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
249  if (!Node || !Node->getNumOperands())
250  return;
251  auto Op0 = Node->getOperand(0);
252  if (Op0->getNumOperands() <= 1)
253  return;
254 
255  Kernel.mLanguage = "OpenCL C";
256  Kernel.mLanguageVersion.push_back(
257  mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
258  Kernel.mLanguageVersion.push_back(
259  mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
260 }
261 
262 void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) {
263  auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
264 
265  if (auto Node = Func.getMetadata("reqd_work_group_size"))
266  Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
267  if (auto Node = Func.getMetadata("work_group_size_hint"))
268  Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
269  if (auto Node = Func.getMetadata("vec_type_hint")) {
270  Attrs.mVecTypeHint = getTypeName(
271  cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
272  mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
273  }
274  if (Func.hasFnAttribute("runtime-handle")) {
275  Attrs.mRuntimeHandle =
276  Func.getFnAttribute("runtime-handle").getValueAsString().str();
277  }
278 }
279 
280 void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func,
281  const GCNSubtarget &ST) {
282  for (auto &Arg : Func.args())
283  emitKernelArg(Arg);
284 
285  emitHiddenKernelArgs(Func, ST);
286 }
287 
288 void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) {
289  auto Func = Arg.getParent();
290  auto ArgNo = Arg.getArgNo();
291  const MDNode *Node;
292 
293  StringRef Name;
294  Node = Func->getMetadata("kernel_arg_name");
295  if (Node && ArgNo < Node->getNumOperands())
296  Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
297  else if (Arg.hasName())
298  Name = Arg.getName();
299 
301  Node = Func->getMetadata("kernel_arg_type");
302  if (Node && ArgNo < Node->getNumOperands())
303  TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
304 
305  StringRef BaseTypeName;
306  Node = Func->getMetadata("kernel_arg_base_type");
307  if (Node && ArgNo < Node->getNumOperands())
308  BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
309 
311  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
312  Arg.hasNoAliasAttr()) {
313  AccQual = "read_only";
314  } else {
315  Node = Func->getMetadata("kernel_arg_access_qual");
316  if (Node && ArgNo < Node->getNumOperands())
317  AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
318  }
319 
320  StringRef TypeQual;
321  Node = Func->getMetadata("kernel_arg_type_qual");
322  if (Node && ArgNo < Node->getNumOperands())
323  TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
324 
325  const DataLayout &DL = Func->getParent()->getDataLayout();
326 
328  if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
329  if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
330  // FIXME: Should report this for all address spaces
331  PointeeAlign = Arg.getParamAlign().valueOrOne();
332  }
333  }
334 
335  Type *ArgTy;
336  Align ArgAlign;
337  std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
338 
339  emitKernelArg(DL, ArgTy, ArgAlign,
340  getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
341  TypeName, BaseTypeName, AccQual, TypeQual);
342 }
343 
344 void MetadataStreamerYamlV2::emitKernelArg(
345  const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind,
347  StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) {
348  HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
349  auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
350 
351  Arg.mName = std::string(Name);
352  Arg.mTypeName = std::string(TypeName);
353  Arg.mSize = DL.getTypeAllocSize(Ty);
354  Arg.mAlign = Alignment.value();
355  Arg.mValueKind = ValueKind;
356  Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
357 
358  if (auto PtrTy = dyn_cast<PointerType>(Ty))
359  Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
360 
361  Arg.mAccQual = getAccessQualifier(AccQual);
362 
363  // TODO: Emit Arg.mActualAccQual.
364 
365  SmallVector<StringRef, 1> SplitTypeQuals;
366  TypeQual.split(SplitTypeQuals, " ", -1, false);
367  for (StringRef Key : SplitTypeQuals) {
368  auto P = StringSwitch<bool*>(Key)
369  .Case("const", &Arg.mIsConst)
370  .Case("restrict", &Arg.mIsRestrict)
371  .Case("volatile", &Arg.mIsVolatile)
372  .Case("pipe", &Arg.mIsPipe)
373  .Default(nullptr);
374  if (P)
375  *P = true;
376  }
377 }
378 
379 void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func,
380  const GCNSubtarget &ST) {
381  unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
382  if (!HiddenArgNumBytes)
383  return;
384 
385  auto &DL = Func.getParent()->getDataLayout();
386  auto Int64Ty = Type::getInt64Ty(Func.getContext());
387 
388  if (HiddenArgNumBytes >= 8)
389  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
390  if (HiddenArgNumBytes >= 16)
391  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
392  if (HiddenArgNumBytes >= 24)
393  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
394 
395  auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
397 
398  if (HiddenArgNumBytes >= 32) {
399  // We forbid the use of features requiring hostcall when compiling OpenCL
400  // before code object V5, which makes the mutual exclusion between the
401  // "printf buffer" and "hostcall buffer" here sound.
402  if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
403  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
404  else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
405  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
406  else
407  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
408  }
409 
410  // Emit "default queue" and "completion action" arguments if enqueue kernel is
411  // used, otherwise emit dummy "none" arguments.
412  if (HiddenArgNumBytes >= 48) {
413  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
414  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
415  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
416  } else {
417  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
418  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
419  }
420  }
421 
422  // Emit the pointer argument for multi-grid object.
423  if (HiddenArgNumBytes >= 56) {
424  if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
425  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
426  else
427  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
428  }
429 }
430 
432  return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
433 }
434 
436  const IsaInfo::AMDGPUTargetID &TargetID) {
437  emitVersion();
438  emitPrintf(Mod);
439 }
440 
442  std::string HSAMetadataString;
443  if (toString(HSAMetadata, HSAMetadataString))
444  return;
445 
446  if (DumpHSAMetadata)
447  dump(HSAMetadataString);
448  if (VerifyHSAMetadata)
449  verify(HSAMetadataString);
450 }
451 
453  const SIProgramInfo &ProgramInfo) {
454  auto &Func = MF.getFunction();
455  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
456  return;
457 
458  auto CodeProps = getHSACodeProps(MF, ProgramInfo);
459  auto DebugProps = getHSADebugProps(MF, ProgramInfo);
460 
461  HSAMetadata.mKernels.push_back(Kernel::Metadata());
462  auto &Kernel = HSAMetadata.mKernels.back();
463 
464  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
465  Kernel.mName = std::string(Func.getName());
466  Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
467  emitKernelLanguage(Func);
468  emitKernelAttrs(Func);
469  emitKernelArgs(Func, ST);
470  HSAMetadata.mKernels.back().mCodeProps = CodeProps;
471  HSAMetadata.mKernels.back().mDebugProps = DebugProps;
472 }
473 
474 //===----------------------------------------------------------------------===//
475 // HSAMetadataStreamerV3
476 //===----------------------------------------------------------------------===//
477 
478 void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const {
479  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
480 }
481 
482 void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
483  errs() << "AMDGPU HSA Metadata Parser Test: ";
484 
485  msgpack::Document FromHSAMetadataString;
486 
487  if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
488  errs() << "FAIL\n";
489  return;
490  }
491 
492  std::string ToHSAMetadataString;
493  raw_string_ostream StrOS(ToHSAMetadataString);
494  FromHSAMetadataString.toYAML(StrOS);
495 
496  errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
497  if (HSAMetadataString != ToHSAMetadataString) {
498  errs() << "Original input: " << HSAMetadataString << '\n'
499  << "Produced output: " << StrOS.str() << '\n';
500  }
501 }
502 
506  .Case("read_only", StringRef("read_only"))
507  .Case("write_only", StringRef("write_only"))
508  .Case("read_write", StringRef("read_write"))
509  .Default(None);
510 }
511 
513  unsigned AddressSpace) const {
514  switch (AddressSpace) {
516  return StringRef("private");
518  return StringRef("global");
520  return StringRef("constant");
522  return StringRef("local");
524  return StringRef("generic");
526  return StringRef("region");
527  default:
528  return None;
529  }
530 }
531 
532 StringRef
534  StringRef BaseTypeName) const {
535  if (TypeQual.contains("pipe"))
536  return "pipe";
537 
538  return StringSwitch<StringRef>(BaseTypeName)
539  .Case("image1d_t", "image")
540  .Case("image1d_array_t", "image")
541  .Case("image1d_buffer_t", "image")
542  .Case("image2d_t", "image")
543  .Case("image2d_array_t", "image")
544  .Case("image2d_array_depth_t", "image")
545  .Case("image2d_array_msaa_t", "image")
546  .Case("image2d_array_msaa_depth_t", "image")
547  .Case("image2d_depth_t", "image")
548  .Case("image2d_msaa_t", "image")
549  .Case("image2d_msaa_depth_t", "image")
550  .Case("image3d_t", "image")
551  .Case("sampler_t", "sampler")
552  .Case("queue_t", "queue")
553  .Default(isa<PointerType>(Ty)
555  ? "dynamic_shared_pointer"
556  : "global_buffer")
557  : "by_value");
558 }
559 
561  bool Signed) const {
562  switch (Ty->getTypeID()) {
563  case Type::IntegerTyID: {
564  if (!Signed)
565  return (Twine('u') + getTypeName(Ty, true)).str();
566 
567  auto BitWidth = Ty->getIntegerBitWidth();
568  switch (BitWidth) {
569  case 8:
570  return "char";
571  case 16:
572  return "short";
573  case 32:
574  return "int";
575  case 64:
576  return "long";
577  default:
578  return (Twine('i') + Twine(BitWidth)).str();
579  }
580  }
581  case Type::HalfTyID:
582  return "half";
583  case Type::FloatTyID:
584  return "float";
585  case Type::DoubleTyID:
586  return "double";
587  case Type::FixedVectorTyID: {
588  auto VecTy = cast<FixedVectorType>(Ty);
589  auto ElTy = VecTy->getElementType();
590  auto NumElements = VecTy->getNumElements();
591  return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
592  }
593  default:
594  return "unknown";
595  }
596 }
597 
600  auto Dims = HSAMetadataDoc->getArrayNode();
601  if (Node->getNumOperands() != 3)
602  return Dims;
603 
604  for (auto &Op : Node->operands())
605  Dims.push_back(Dims.getDocument()->getNode(
606  uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
607  return Dims;
608 }
609 
611  auto Version = HSAMetadataDoc->getArrayNode();
612  Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
613  Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
614  getRootMetadata("amdhsa.version") = Version;
615 }
616 
618  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
619  if (!Node)
620  return;
621 
622  auto Printf = HSAMetadataDoc->getArrayNode();
623  for (auto *Op : Node->operands())
624  if (Op->getNumOperands())
625  Printf.push_back(Printf.getDocument()->getNode(
626  cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
627  getRootMetadata("amdhsa.printf") = Printf;
628 }
629 
631  msgpack::MapDocNode Kern) {
632  // TODO: What about other languages?
633  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
634  if (!Node || !Node->getNumOperands())
635  return;
636  auto Op0 = Node->getOperand(0);
637  if (Op0->getNumOperands() <= 1)
638  return;
639 
640  Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
641  auto LanguageVersion = Kern.getDocument()->getArrayNode();
642  LanguageVersion.push_back(Kern.getDocument()->getNode(
643  mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
644  LanguageVersion.push_back(Kern.getDocument()->getNode(
645  mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
646  Kern[".language_version"] = LanguageVersion;
647 }
648 
650  msgpack::MapDocNode Kern) {
651 
652  if (auto Node = Func.getMetadata("reqd_work_group_size"))
653  Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
654  if (auto Node = Func.getMetadata("work_group_size_hint"))
655  Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
656  if (auto Node = Func.getMetadata("vec_type_hint")) {
657  Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
658  getTypeName(
659  cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
660  mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
661  /*Copy=*/true);
662  }
663  if (Func.hasFnAttribute("runtime-handle")) {
664  Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
665  Func.getFnAttribute("runtime-handle").getValueAsString().str(),
666  /*Copy=*/true);
667  }
668  if (Func.hasFnAttribute("device-init"))
669  Kern[".kind"] = Kern.getDocument()->getNode("init");
670  else if (Func.hasFnAttribute("device-fini"))
671  Kern[".kind"] = Kern.getDocument()->getNode("fini");
672 }
673 
675  msgpack::MapDocNode Kern) {
676  auto &Func = MF.getFunction();
677  unsigned Offset = 0;
678  auto Args = HSAMetadataDoc->getArrayNode();
679  for (auto &Arg : Func.args())
680  emitKernelArg(Arg, Offset, Args);
681 
682  emitHiddenKernelArgs(MF, Offset, Args);
683 
684  Kern[".args"] = Args;
685 }
686 
688  unsigned &Offset,
690  auto Func = Arg.getParent();
691  auto ArgNo = Arg.getArgNo();
692  const MDNode *Node;
693 
694  StringRef Name;
695  Node = Func->getMetadata("kernel_arg_name");
696  if (Node && ArgNo < Node->getNumOperands())
697  Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
698  else if (Arg.hasName())
699  Name = Arg.getName();
700 
702  Node = Func->getMetadata("kernel_arg_type");
703  if (Node && ArgNo < Node->getNumOperands())
704  TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
705 
706  StringRef BaseTypeName;
707  Node = Func->getMetadata("kernel_arg_base_type");
708  if (Node && ArgNo < Node->getNumOperands())
709  BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
710 
712  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
713  Arg.hasNoAliasAttr()) {
714  AccQual = "read_only";
715  } else {
716  Node = Func->getMetadata("kernel_arg_access_qual");
717  if (Node && ArgNo < Node->getNumOperands())
718  AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
719  }
720 
721  StringRef TypeQual;
722  Node = Func->getMetadata("kernel_arg_type_qual");
723  if (Node && ArgNo < Node->getNumOperands())
724  TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
725 
726  const DataLayout &DL = Func->getParent()->getDataLayout();
727 
729  Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
730 
731  // FIXME: Need to distinguish in memory alignment from pointer alignment.
732  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
733  if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
734  PointeeAlign = Arg.getParamAlign().valueOrOne();
735  }
736 
737  // There's no distinction between byval aggregates and raw aggregates.
738  Type *ArgTy;
739  Align ArgAlign;
740  std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
741 
742  emitKernelArg(DL, ArgTy, ArgAlign,
743  getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
744  PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
745 }
746 
748  const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
750  StringRef Name, StringRef TypeName, StringRef BaseTypeName,
751  StringRef AccQual, StringRef TypeQual) {
752  auto Arg = Args.getDocument()->getMapNode();
753 
754  if (!Name.empty())
755  Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
756  if (!TypeName.empty())
757  Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
758  auto Size = DL.getTypeAllocSize(Ty);
759  Arg[".size"] = Arg.getDocument()->getNode(Size);
760  Offset = alignTo(Offset, Alignment);
761  Arg[".offset"] = Arg.getDocument()->getNode(Offset);
762  Offset += Size;
763  Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
764  if (PointeeAlign)
765  Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
766 
767  if (auto PtrTy = dyn_cast<PointerType>(Ty))
768  if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
769  // Limiting address space to emit only for a certain ValueKind.
770  if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
771  Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
772  /*Copy=*/true);
773 
774  if (auto AQ = getAccessQualifier(AccQual))
775  Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
776 
777  // TODO: Emit Arg[".actual_access"].
778 
779  SmallVector<StringRef, 1> SplitTypeQuals;
780  TypeQual.split(SplitTypeQuals, " ", -1, false);
781  for (StringRef Key : SplitTypeQuals) {
782  if (Key == "const")
783  Arg[".is_const"] = Arg.getDocument()->getNode(true);
784  else if (Key == "restrict")
785  Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
786  else if (Key == "volatile")
787  Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
788  else if (Key == "pipe")
789  Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
790  }
791 
792  Args.push_back(Arg);
793 }
794 
796  const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
797  auto &Func = MF.getFunction();
798  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
799 
800  unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
801  if (!HiddenArgNumBytes)
802  return;
803 
804  const Module *M = Func.getParent();
805  auto &DL = M->getDataLayout();
806  auto Int64Ty = Type::getInt64Ty(Func.getContext());
807 
808  Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
809 
810  if (HiddenArgNumBytes >= 8)
811  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
812  Args);
813  if (HiddenArgNumBytes >= 16)
814  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
815  Args);
816  if (HiddenArgNumBytes >= 24)
817  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
818  Args);
819 
820  auto Int8PtrTy =
821  Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
822 
823  if (HiddenArgNumBytes >= 32) {
824  // We forbid the use of features requiring hostcall when compiling OpenCL
825  // before code object V5, which makes the mutual exclusion between the
826  // "printf buffer" and "hostcall buffer" here sound.
827  if (M->getNamedMetadata("llvm.printf.fmts"))
828  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
829  Args);
830  else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
831  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
832  Args);
833  else
834  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
835  }
836 
837  // Emit "default queue" and "completion action" arguments if enqueue kernel is
838  // used, otherwise emit dummy "none" arguments.
839  if (HiddenArgNumBytes >= 48) {
840  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
841  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
842  Args);
843  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
844  Args);
845  } else {
846  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
847  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
848  }
849  }
850 
851  // Emit the pointer argument for multi-grid object.
852  if (HiddenArgNumBytes >= 56) {
853  if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
854  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
855  Args);
856  } else {
857  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
858  }
859  }
860 }
861 
863  const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
864  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
866  const Function &F = MF.getFunction();
867 
868  auto Kern = HSAMetadataDoc->getMapNode();
869 
870  Align MaxKernArgAlign;
871  Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
872  STM.getKernArgSegmentSize(F, MaxKernArgAlign));
873  Kern[".group_segment_fixed_size"] =
874  Kern.getDocument()->getNode(ProgramInfo.LDSSize);
875  Kern[".private_segment_fixed_size"] =
876  Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
877  Kern[".uses_dynamic_stack"] =
878  Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
879 
880  // FIXME: The metadata treats the minimum as 16?
881  Kern[".kernarg_segment_align"] =
882  Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
883  Kern[".wavefront_size"] =
884  Kern.getDocument()->getNode(STM.getWavefrontSize());
885  Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
886  Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
887 
888  // Only add AGPR count to metadata for supported devices
889  if (STM.hasMAIInsts()) {
890  Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
891  }
892 
893  Kern[".max_flat_workgroup_size"] =
894  Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
895  Kern[".sgpr_spill_count"] =
896  Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
897  Kern[".vgpr_spill_count"] =
898  Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
899 
900  return Kern;
901 }
902 
904  return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
905 }
906 
908  const IsaInfo::AMDGPUTargetID &TargetID) {
909  emitVersion();
910  emitPrintf(Mod);
911  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
912 }
913 
915  std::string HSAMetadataString;
916  raw_string_ostream StrOS(HSAMetadataString);
917  HSAMetadataDoc->toYAML(StrOS);
918 
919  if (DumpHSAMetadata)
920  dump(StrOS.str());
921  if (VerifyHSAMetadata)
922  verify(StrOS.str());
923 }
924 
926  const SIProgramInfo &ProgramInfo) {
927  auto &Func = MF.getFunction();
928  auto Kern = getHSAKernelProps(MF, ProgramInfo);
929 
930  assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
931  Func.getCallingConv() == CallingConv::SPIR_KERNEL);
932 
933  auto Kernels =
934  getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
935 
936  {
937  Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
938  Kern[".symbol"] = Kern.getDocument()->getNode(
939  (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
940  emitKernelLanguage(Func, Kern);
941  emitKernelAttrs(Func, Kern);
942  emitKernelArgs(MF, Kern);
943  }
944 
945  Kernels.push_back(Kern);
946 }
947 
948 //===----------------------------------------------------------------------===//
949 // HSAMetadataStreamerV4
950 //===----------------------------------------------------------------------===//
951 
953  auto Version = HSAMetadataDoc->getArrayNode();
954  Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
955  Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
956  getRootMetadata("amdhsa.version") = Version;
957 }
958 
960  const IsaInfo::AMDGPUTargetID &TargetID) {
961  getRootMetadata("amdhsa.target") =
962  HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
963 }
964 
966  const IsaInfo::AMDGPUTargetID &TargetID) {
967  emitVersion();
968  emitTargetID(TargetID);
969  emitPrintf(Mod);
970  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
971 }
972 
973 //===----------------------------------------------------------------------===//
974 // HSAMetadataStreamerV5
975 //===----------------------------------------------------------------------===//
976 
978  auto Version = HSAMetadataDoc->getArrayNode();
979  Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
980  Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
981  getRootMetadata("amdhsa.version") = Version;
982 }
983 
985  const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
986  auto &Func = MF.getFunction();
987  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
988 
989  // No implicit kernel argument is used.
990  if (ST.getImplicitArgNumBytes(Func) == 0)
991  return;
992 
993  const Module *M = Func.getParent();
994  auto &DL = M->getDataLayout();
996 
997  auto Int64Ty = Type::getInt64Ty(Func.getContext());
998  auto Int32Ty = Type::getInt32Ty(Func.getContext());
999  auto Int16Ty = Type::getInt16Ty(Func.getContext());
1000 
1001  Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
1002  emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
1003  emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
1004  emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
1005 
1006  emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
1007  emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
1008  emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
1009 
1010  emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
1011  emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
1012  emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
1013 
1014  // Reserved for hidden_tool_correlation_id.
1015  Offset += 8;
1016 
1017  Offset += 8; // Reserved.
1018 
1019  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
1020  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
1021  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
1022 
1023  emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
1024 
1025  Offset += 6; // Reserved.
1026  auto Int8PtrTy =
1027  Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
1028 
1029  if (M->getNamedMetadata("llvm.printf.fmts")) {
1030  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
1031  Args);
1032  } else {
1033  Offset += 8; // Skipped.
1034  }
1035 
1036  if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
1037  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
1038  Args);
1039  } else {
1040  Offset += 8; // Skipped.
1041  }
1042 
1043  if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
1044  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
1045  Args);
1046  } else {
1047  Offset += 8; // Skipped.
1048  }
1049 
1050  if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
1051  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1052  else
1053  Offset += 8; // Skipped.
1054 
1055  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
1056  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
1057  Args);
1058  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
1059  Args);
1060  } else {
1061  Offset += 16; // Skipped.
1062  }
1063 
1064  Offset += 72; // Reserved.
1065 
1066  // hidden_private_base and hidden_shared_base are only when the subtarget has
1067  // ApertureRegs.
1068  if (!ST.hasApertureRegs()) {
1069  emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
1070  emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
1071  } else {
1072  Offset += 8; // Skipped.
1073  }
1074 
1075  if (MFI.hasQueuePtr())
1076  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
1077 }
1078 
1079 } // end namespace HSAMD
1080 } // end namespace AMDGPU
1081 } // end namespace llvm
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::verify
void verify(StringRef HSAMetadataString) const
Definition: AMDGPUHSAMetadataStreamer.cpp:482
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4710
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::getHSAKernelProps
msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const
Definition: AMDGPUHSAMetadataStreamer.cpp:862
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AMDGPU::HSAMD::AccessQualifier::Unknown
@ Unknown
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::msgpack::DocNode::getDocument
Document * getDocument() const
Definition: MsgPackDocument.h:80
SIMachineFunctionInfo.h
llvm::AMDGPU::HSAMD::Metadata::mVersion
std::vector< uint32_t > mVersion
HSA metadata version. Required.
Definition: AMDGPUMetadata.h:438
llvm::Function
Definition: Function.h:60
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::AMDGPU::HSAMD::MetadataStreamerYamlV2::emitKernel
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
Definition: AMDGPUHSAMetadataStreamer.cpp:452
llvm::AMDGPU::HSAMD::ValueKind::ByValue
@ ByValue
AMDGPUHSAMetadataStreamer.h
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:628
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitKernelArgs
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern)
Definition: AMDGPUHSAMetadataStreamer.cpp:674
llvm::SmallVector< StringRef, 1 >
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::begin
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
Definition: AMDGPUHSAMetadataStreamer.cpp:907
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::SIProgramInfo::NumSGPR
uint32_t NumSGPR
Definition: SIProgramInfo.h:51
llvm::AMDGPU::HSAMD::VersionMajorV4
constexpr uint32_t VersionMajorV4
HSA metadata major version for code object V4.
Definition: AMDGPUMetadata.h:43
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Region
@ Region
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
llvm::AMDGPU::HSAMD::MetadataStreamerYamlV2::end
void end() override
Definition: AMDGPUHSAMetadataStreamer.cpp:441
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:377
llvm::Optional
Definition: APInt.h:33
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::SIProgramInfo::NumVGPR
uint32_t NumVGPR
Definition: SIProgramInfo.h:46
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::getTypeName
std::string getTypeName(Type *Ty, bool Signed) const
Definition: AMDGPUHSAMetadataStreamer.cpp:560
llvm::SIProgramInfo::LDSSize
uint32_t LDSSize
Definition: SIProgramInfo.h:54
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::emitVersion
void emitVersion() override
Definition: AMDGPUHSAMetadataStreamer.cpp:952
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:373
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:552
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPU::HSAMD::VersionMajorV2
constexpr uint32_t VersionMajorV2
HSA metadata major version for code object V2.
Definition: AMDGPUMetadata.h:33
llvm::AMDGPU::HSAMD::AccessQualifier
AccessQualifier
Access qualifiers.
Definition: AMDGPUMetadata.h:58
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitKernel
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
Definition: AMDGPUHSAMetadataStreamer.cpp:925
llvm::AMDGPU::HSAMD::Kernel::Key::DebugProps
constexpr char DebugProps[]
Key for Kernel::Metadata::mDebugProps.
Definition: AMDGPUMetadata.h:398
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::AMDGPU::HSAMD::fromString
std::error_code fromString(StringRef String, Metadata &HSAMetadata)
Converts String to HSAMetadata.
Definition: AMDGPUMetadata.cpp:213
llvm::AMDGPU::HSAMD::Key::Kernels
constexpr char Kernels[]
Key for HSA::Metadata::mKernels.
Definition: AMDGPUMetadata.h:432
llvm::AMDGPU::HSAMD::ValueKind
ValueKind
Value kinds.
Definition: AMDGPUMetadata.h:78
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:105
llvm::AMDGPUTargetStreamer::EmitHSAMetadata
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)=0
Emit HSA Metadata.
llvm::AMDGPU::HSAMD::ValueKind::HiddenCompletionAction
@ HiddenCompletionAction
llvm::AMDGPU::HSAMD::MetadataStreamerYamlV2::emitVersion
void emitVersion() override
Definition: AMDGPUHSAMetadataStreamer.cpp:224
GCNSubtarget.h
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
llvm::AMDGPU::HSAMD::VersionMinorV2
constexpr uint32_t VersionMinorV2
HSA metadata minor version for code object V2.
Definition: AMDGPUMetadata.h:35
llvm::SIProgramInfo::ScratchSize
uint64_t ScratchSize
Definition: SIProgramInfo.h:37
llvm::msgpack::Document::getArrayNode
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
Definition: MsgPackDocument.h:380
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::AccQual
constexpr char AccQual[]
Key for Kernel::Arg::Metadata::mAccQual.
Definition: AMDGPUMetadata.h:191
llvm::AMDGPU::HSAMD::Kernel::Key::CodeProps
constexpr char CodeProps[]
Key for Kernel::Metadata::mCodeProps.
Definition: AMDGPUMetadata.h:396
llvm::AMDGPU::HSAMD::ValueKind::HiddenGlobalOffsetY
@ HiddenGlobalOffsetY
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:486
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::getWorkGroupDimensions
msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const
Definition: AMDGPUHSAMetadataStreamer.cpp:599
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::getAddressSpaceQualifier
Optional< StringRef > getAddressSpaceQualifier(unsigned AddressSpace) const
Definition: AMDGPUHSAMetadataStreamer.cpp:512
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::dump
void dump(StringRef HSAMetadataString) const
Definition: AMDGPUHSAMetadataStreamer.cpp:478
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::begin
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
Definition: AMDGPUHSAMetadataStreamer.cpp:965
llvm::AMDGPU::HSAMD::ValueKind::HiddenGlobalOffsetZ
@ HiddenGlobalOffsetZ
llvm::msgpack::Document
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
Definition: MsgPackDocument.h:272
SIProgramInfo.h
getArgumentTypeAlign
static std::pair< Type *, Align > getArgumentTypeAlign(const Argument &Arg, const DataLayout &DL)
Definition: AMDGPUHSAMetadataStreamer.cpp:24
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::emitTargetID
void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID)
Definition: AMDGPUHSAMetadataStreamer.cpp:959
llvm::SIProgramInfo::DynamicCallStack
bool DynamicCallStack
Definition: SIProgramInfo.h:68
llvm::IndexedInstrProf::Version
const uint64_t Version
Definition: InstrProf.h:1056
llvm::AMDGPU::HSAMD::ValueKind::GlobalBuffer
@ GlobalBuffer
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::None
const NoneType None
Definition: None.h:24
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::msgpack::Document::fromYAML
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
Definition: MsgPackDocumentYAML.cpp:242
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:372
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitKernelLanguage
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern)
Definition: AMDGPUHSAMetadataStreamer.cpp:630
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Global
@ Global
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::getValueKind
StringRef getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const
Definition: AMDGPUHSAMetadataStreamer.cpp:533
llvm::AMDGPU::HSAMD::AccessQualifier::WriteOnly
@ WriteOnly
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::AMDGPU::HSAMD::MetadataStreamerYamlV2::emitTo
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
Definition: AMDGPUHSAMetadataStreamer.cpp:431
llvm::AMDGPU::HSAMD::VersionMajorV5
constexpr uint32_t VersionMajorV5
HSA metadata major version for code object V5.
Definition: AMDGPUMetadata.h:48
llvm::cl::opt< bool >
llvm::AMDGPU::HSAMD::ValueKind::Image
@ Image
llvm::AMDGPU::HSAMD::VersionMinorV5
constexpr uint32_t VersionMinorV5
HSA metadata minor version for code object V5.
Definition: AMDGPUMetadata.h:50
llvm::msgpack::MapDocNode
A DocNode that is a map.
Definition: MsgPackDocument.h:219
llvm::AMDGPU::HSAMD::Metadata::mPrintf
std::vector< std::string > mPrintf
Printf metadata. Optional.
Definition: AMDGPUMetadata.h:440
AMDGPUTargetStreamer.h
uint64_t
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Local
@ Local
llvm::AMDGPU::HSAMD::AccessQualifier::ReadOnly
@ ReadOnly
llvm::AMDGPU::HSAMD::AccessQualifier::ReadWrite
@ ReadWrite
llvm::omp::Kernel
Function * Kernel
Summary of a kernel (=entry point for target offloading).
Definition: OpenMPOpt.h:21
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitKernelArg
void emitKernelArg(const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args)
Definition: AMDGPUHSAMetadataStreamer.cpp:687
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitHiddenKernelArgs
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
Definition: AMDGPUHSAMetadataStreamer.cpp:795
llvm::AMDGPU::HSAMD::Kernel::Metadata
In-memory representation of kernel metadata.
Definition: AMDGPUMetadata.h:402
llvm::AMDGPU::HSAMD::ValueKind::HiddenDefaultQueue
@ HiddenDefaultQueue
llvm::AMDGPU::HSAMD::ValueKind::HiddenNone
@ HiddenNone
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitKernelAttrs
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern)
Definition: AMDGPUHSAMetadataStreamer.cpp:649
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitVersion
void emitVersion() override
Definition: AMDGPUHSAMetadataStreamer.cpp:610
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::PointeeAlign
constexpr char PointeeAlign[]
Key for Kernel::Arg::Metadata::mPointeeAlign.
Definition: AMDGPUMetadata.h:187
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Generic
@ Generic
llvm::AMDGPU::HSAMD::ValueKind::Pipe
@ Pipe
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::getAccessQualifier
Optional< StringRef > getAccessQualifier(StringRef AccQual) const
Definition: AMDGPUHSAMetadataStreamer.cpp:504
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:201
llvm::msgpack::Document::getNode
DocNode getNode()
Create a nil node associated with this Document.
Definition: MsgPackDocument.h:308
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Unknown
@ Unknown
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Constant
@ Constant
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::AMDGPU::HSAMD::ValueKind::HiddenPrintfBuffer
@ HiddenPrintfBuffer
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::AMDGPU::HSAMD::VersionMajorV3
constexpr uint32_t VersionMajorV3
HSA metadata major version for code object V3.
Definition: AMDGPUMetadata.h:38
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::end
void end() override
Definition: AMDGPUHSAMetadataStreamer.cpp:914
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitPrintf
void emitPrintf(const Module &Mod)
Definition: AMDGPUHSAMetadataStreamer.cpp:617
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::getRootMetadata
msgpack::DocNode & getRootMetadata(StringRef Key)
Definition: AMDGPUHSAMetadataStreamer.h:109
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV5::emitHiddenKernelArgs
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
Definition: AMDGPUHSAMetadataStreamer.cpp:984
llvm::AMDGPU::HSAMD::MetadataStreamerYamlV2::begin
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
Definition: AMDGPUHSAMetadataStreamer.cpp:435
llvm::AMDGPU::HSAMD::ValueKind::Sampler
@ Sampler
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPU::HSAMD::Kernel::Key::LanguageVersion
constexpr char LanguageVersion[]
Key for Kernel::Metadata::mLanguageVersion.
Definition: AMDGPUMetadata.h:390
llvm::AMDGPU::HSAMD::Metadata::mKernels
std::vector< Kernel::Metadata > mKernels
Kernels metadata. Required.
Definition: AMDGPUMetadata.h:442
llvm::AMDGPUTargetStreamer
Definition: AMDGPUTargetStreamer.h:34
llvm::AMDGPU::HSAMD::ValueKind::HiddenMultiGridSyncArg
@ HiddenMultiGridSyncArg
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::AMDGPU::HSAMD::AddressSpaceQualifier
AddressSpaceQualifier
Address space qualifiers.
Definition: AMDGPUMetadata.h:67
llvm::SIProgramInfo::NumAccVGPR
uint32_t NumAccVGPR
Definition: SIProgramInfo.h:48
llvm::AMDGPU::HSAMD::ValueKind::DynamicSharedPointer
@ DynamicSharedPointer
llvm::Type::IntegerTyID
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
llvm::AMDGPU::HSAMD::ValueKind::HiddenHostcallBuffer
@ HiddenHostcallBuffer
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::toString
std::string toString() const
Definition: AMDGPUBaseInfo.cpp:554
llvm::msgpack::Document::toYAML
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
Definition: MsgPackDocumentYAML.cpp:236
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitTo
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
Definition: AMDGPUHSAMetadataStreamer.cpp:903
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::TypeName
constexpr char TypeName[]
Key for Kernel::Arg::Metadata::mTypeName.
Definition: AMDGPUMetadata.h:175
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Type::FixedVectorTyID
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
llvm::VerifyHSAMetadata
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
llvm::msgpack::DocNode::getArray
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Definition: MsgPackDocument.h:129
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::HSAMetadataDoc
std::unique_ptr< msgpack::Document > HSAMetadataDoc
Definition: AMDGPUHSAMetadataStreamer.h:65
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::DumpHSAMetadata
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
llvm::AMDGPU::HSAMD::AccessQualifier::Default
@ Default
llvm::AMDGPU::HSAMD::Key::Printf
constexpr char Printf[]
Key for HSA::Metadata::mPrintf.
Definition: AMDGPUMetadata.h:430
llvm::StringRef::contains
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:415
llvm::msgpack::ArrayDocNode
A DocNode that is an array.
Definition: MsgPackDocument.h:249
llvm::SIProgramInfo
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:550
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Private
@ Private
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:375
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:351
llvm::Module::getNamedMetadata
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:251
llvm::SIMachineFunctionInfo::hasQueuePtr
bool hasQueuePtr() const
Definition: SIMachineFunctionInfo.h:687
llvm::StringSwitch::Default
R Default(T Value)
Definition: StringSwitch.h:182
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:238
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::cl::desc
Definition: CommandLine.h:413
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV5::emitVersion
void emitVersion() override
Definition: AMDGPUHSAMetadataStreamer.cpp:977
llvm::AMDGPU::HSAMD::VersionMinorV3
constexpr uint32_t VersionMinorV3
HSA metadata minor version for code object V3.
Definition: AMDGPUMetadata.h:40
llvm::raw_string_ostream::str
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:646
llvm::StringRef::split
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:689
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:371
llvm::Type::HalfTyID
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
llvm::AMDGPU::HSAMD::ValueKind::HiddenGlobalOffsetX
@ HiddenGlobalOffsetX
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:729
llvm::AMDGPU::HSAMD::VersionMinorV4
constexpr uint32_t VersionMinorV4
HSA metadata minor version for code object V4.
Definition: AMDGPUMetadata.h:45
llvm::AMDGPU::HSAMD::toString
std::error_code toString(Metadata HSAMetadata, std::string &String)
Converts HSAMetadata to String.
Definition: AMDGPUMetadata.cpp:219