LLVM  16.0.0git
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
12 /// code. When passed an MCAsmStreamer it prints assembly and when passed
13 /// an MCObjectStreamer it outputs binary code.
14 //
15 //===----------------------------------------------------------------------===//
16 //
17 
18 #include "AMDGPUAsmPrinter.h"
19 #include "AMDGPU.h"
22 #include "AMDKernelCodeT.h"
23 #include "GCNSubtarget.h"
26 #include "R600AsmPrinter.h"
27 #include "SIMachineFunctionInfo.h"
29 #include "Utils/AMDGPUBaseInfo.h"
31 #include "llvm/BinaryFormat/ELF.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/MC/MCAssembler.h"
36 #include "llvm/MC/MCContext.h"
37 #include "llvm/MC/MCSectionELF.h"
38 #include "llvm/MC/MCStreamer.h"
39 #include "llvm/MC/TargetRegistry.h"
44 
45 using namespace llvm;
46 using namespace llvm::AMDGPU;
47 
48 // This should get the default rounding mode from the kernel. We just set the
49 // default here, but this could change if the OpenCL rounding mode pragmas are
50 // used.
51 //
52 // The denormal mode here should match what is reported by the OpenCL runtime
53 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
54 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
55 //
56 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
57 // precision, and leaves single precision to flush all and does not report
58 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
59 // CL_FP_DENORM for both.
60 //
61 // FIXME: It seems some instructions do not support single precision denormals
62 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
63 // and sin_f32, cos_f32 on most parts).
64 
65 // We want to use these instructions, and using fp32 denormals also causes
66 // instructions to run at the double precision rate for the device so it's
67 // probably best to just report no single precision denormals.
68 static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
71  FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |
72  FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());
73 }
74 
75 static AsmPrinter *
77  std::unique_ptr<MCStreamer> &&Streamer) {
78  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
79 }
80 
86 }
87 
89  std::unique_ptr<MCStreamer> Streamer)
90  : AsmPrinter(TM, std::move(Streamer)) {
93  HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2());
94  } else if (isHsaAbiVersion3(getGlobalSTI())) {
95  HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3());
96  } else if (isHsaAbiVersion5(getGlobalSTI())) {
97  HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5());
98  } else {
99  HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4());
100  }
101  }
102 }
103 
105  return "AMDGPU Assembly Printer";
106 }
107 
109  return TM.getMCSubtargetInfo();
110 }
111 
113  if (!OutStreamer)
114  return nullptr;
115  return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
116 }
117 
120 }
121 
122 void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
124 
125  // TODO: Which one is called first, emitStartOfAsmFile or
126  // emitFunctionBodyStart?
127  if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
128  initializeTargetID(M);
129 
130  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
132  return;
133 
136 
138  HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
139 
142 
144  return;
145 
146  // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
149 
150  // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
153  Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
154 }
155 
157  // Init target streamer if it has not yet happened
159  initTargetStreamer(M);
160 
161  // Following code requires TargetStreamer to be present.
162  if (!getTargetStreamer())
163  return;
164 
165  if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
168 
169  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
170  // Emit HSA Metadata (NT_AMD_HSA_METADATA).
171  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
172  HSAMetadataStream->end();
173  bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
174  (void)Success;
175  assert(Success && "Malformed HSA Metadata");
176  }
177 }
178 
180  const MachineBasicBlock *MBB) const {
182  return false;
183 
184  if (MBB->empty())
185  return true;
186 
187  // If this is a block implementing a long branch, an expression relative to
188  // the start of the block is needed. to the start of the block.
189  // XXX - Is there a smarter way to check this?
190  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
191 }
192 
195  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
196  const Function &F = MF->getFunction();
197 
198  // TODO: Which one is called first, emitStartOfAsmFile or
199  // emitFunctionBodyStart?
201  initializeTargetID(*F.getParent());
202 
203  const auto &FunctionTargetID = STM.getTargetID();
204  // Make sure function's xnack settings are compatible with module's
205  // xnack settings.
206  if (FunctionTargetID.isXnackSupported() &&
207  FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
208  FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {
209  OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) +
210  "' function does not match module xnack setting");
211  return;
212  }
213  // Make sure function's sramecc settings are compatible with module's
214  // sramecc settings.
215  if (FunctionTargetID.isSramEccSupported() &&
216  FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
217  FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) {
218  OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) +
219  "' function does not match module sramecc setting");
220  return;
221  }
222 
223  if (!MFI.isEntryFunction())
224  return;
225 
226  if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
227  (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
228  F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
229  amd_kernel_code_t KernelCode;
230  getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
231  getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
232  }
233 
234  if (STM.isAmdHsaOS())
235  HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
236 }
237 
240  if (!MFI.isEntryFunction())
241  return;
242 
243  if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
245  return;
246 
247  auto &Streamer = getTargetStreamer()->getStreamer();
248  auto &Context = Streamer.getContext();
249  auto &ObjectFileInfo = *Context.getObjectFileInfo();
250  auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
251 
252  Streamer.pushSection();
253  Streamer.switchSection(&ReadOnlySection);
254 
255  // CP microcode requires the kernel descriptor to be allocated on 64 byte
256  // alignment.
257  Streamer.emitValueToAlignment(64, 0, 1, 0);
258  if (ReadOnlySection.getAlignment() < 64)
259  ReadOnlySection.setAlignment(Align(64));
260 
261  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
262 
263  SmallString<128> KernelName;
264  getNameWithPrefix(KernelName, &MF->getFunction());
266  STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
267  CurrentProgramInfo.NumVGPRsForWavesPerEU,
268  CurrentProgramInfo.NumSGPRsForWavesPerEU -
270  CurrentProgramInfo.VCCUsed,
271  CurrentProgramInfo.FlatUsed),
272  CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
273 
274  Streamer.popSection();
275 }
276 
278  if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
281  return;
282  }
283 
285  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
286  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
291  }
292  if (DumpCodeInstEmitter) {
293  // Disassemble function name label to text.
294  DisasmLines.push_back(MF->getName().str() + ":");
296  HexLines.push_back("");
297  }
298 
300 }
301 
303  if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
304  // Write a line for the basic block label if it is not only fallthrough.
305  DisasmLines.push_back(
306  (Twine("BB") + Twine(getFunctionNumber())
307  + "_" + Twine(MBB.getNumber()) + ":").str());
309  HexLines.push_back("");
310  }
312 }
313 
316  if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
318  Twine(GV->getName()) +
319  ": unsupported initializer for address space");
320  return;
321  }
322 
323  // LDS variables aren't emitted in HSA or PAL yet.
324  const Triple::OSType OS = TM.getTargetTriple().getOS();
325  if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
326  return;
327 
328  MCSymbol *GVSym = getSymbol(GV);
329 
330  GVSym->redefineIfPossible();
331  if (GVSym->isDefined() || GVSym->isVariable())
332  report_fatal_error("symbol '" + Twine(GVSym->getName()) +
333  "' is already defined");
334 
335  const DataLayout &DL = GV->getParent()->getDataLayout();
336  uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
337  Align Alignment = GV->getAlign().value_or(Align(4));
338 
339  emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
340  emitLinkage(GV, GVSym);
341  if (auto TS = getTargetStreamer())
342  TS->emitAMDGPULDS(GVSym, Size, Alignment);
343  return;
344  }
345 
347 }
348 
350  // Pad with s_code_end to help tools and guard against instruction prefetch
351  // causing stale data in caches. Arguably this should be done by the linker,
352  // which is why this isn't done for Mesa.
353  const MCSubtargetInfo &STI = *getGlobalSTI();
354  if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&
355  (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
356  STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
357  OutStreamer->switchSection(getObjFileLowering().getTextSection());
359  }
360 
362 }
363 
364 // Print comments that apply to both callable functions and entry points.
365 void AMDGPUAsmPrinter::emitCommonFunctionComments(
366  uint32_t NumVGPR,
367  Optional<uint32_t> NumAGPR,
368  uint32_t TotalNumVGPR,
369  uint32_t NumSGPR,
370  uint64_t ScratchSize,
371  uint64_t CodeSize,
372  const AMDGPUMachineFunction *MFI) {
373  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
374  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
375  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
376  if (NumAGPR) {
377  OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
378  OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
379  false);
380  }
381  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
382  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
383  false);
384 }
385 
386 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
387  const MachineFunction &MF) const {
389  uint16_t KernelCodeProperties = 0;
390 
391  if (MFI.hasPrivateSegmentBuffer()) {
392  KernelCodeProperties |=
393  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
394  }
395  if (MFI.hasDispatchPtr()) {
396  KernelCodeProperties |=
397  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
398  }
399  if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
400  KernelCodeProperties |=
401  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
402  }
403  if (MFI.hasKernargSegmentPtr()) {
404  KernelCodeProperties |=
405  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
406  }
407  if (MFI.hasDispatchID()) {
408  KernelCodeProperties |=
409  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
410  }
411  if (MFI.hasFlatScratchInit()) {
412  KernelCodeProperties |=
413  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
414  }
416  KernelCodeProperties |=
417  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
418  }
419 
420  if (CurrentProgramInfo.DynamicCallStack) {
421  KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
422  }
423 
424  return KernelCodeProperties;
425 }
426 
427 amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
428  const MachineFunction &MF,
429  const SIProgramInfo &PI) const {
430  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
431  const Function &F = MF.getFunction();
432 
433  amdhsa::kernel_descriptor_t KernelDescriptor;
434  memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
435 
436  assert(isUInt<32>(PI.ScratchSize));
437  assert(isUInt<32>(PI.getComputePGMRSrc1()));
438  assert(isUInt<32>(PI.ComputePGMRSrc2));
439 
440  KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
441  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
442 
443  Align MaxKernArgAlign;
444  KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
445 
446  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
447  KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
448  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
449 
450  assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
451  if (STM.hasGFX90AInsts())
452  KernelDescriptor.compute_pgm_rsrc3 =
453  CurrentProgramInfo.ComputePGMRSrc3GFX90A;
454 
455  return KernelDescriptor;
456 }
457 
459  // Init target streamer lazily on the first function so that previous passes
460  // can set metadata.
462  initTargetStreamer(*MF.getFunction().getParent());
463 
464  ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
465  CurrentProgramInfo = SIProgramInfo();
466 
468 
469  // The starting address of all shader programs must be 256 bytes aligned.
470  // Regular functions just need the basic required instruction alignment.
471  MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
472 
474 
475  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
477  // FIXME: This should be an explicit check for Mesa.
478  if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
479  MCSectionELF *ConfigSection =
480  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
481  OutStreamer->switchSection(ConfigSection);
482  }
483 
484  if (MFI->isModuleEntryFunction()) {
485  getSIProgramInfo(CurrentProgramInfo, MF);
486  }
487 
488  if (STM.isAmdPalOS()) {
489  if (MFI->isEntryFunction())
490  EmitPALMetadata(MF, CurrentProgramInfo);
491  else if (MFI->isModuleEntryFunction())
492  emitPALFunctionMetadata(MF);
493  } else if (!STM.isAmdHsaOS()) {
494  EmitProgramInfoSI(MF, CurrentProgramInfo);
495  }
496 
497  DumpCodeInstEmitter = nullptr;
498  if (STM.dumpCode()) {
499  // For -dumpcode, get the assembler out of the streamer, even if it does
500  // not really want to let us have it. This only works with -filetype=obj.
501  bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
502  OutStreamer->setUseAssemblerInfoForParsing(true);
503  MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
504  OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
505  if (Assembler)
506  DumpCodeInstEmitter = Assembler->getEmitterPtr();
507  }
508 
509  DisasmLines.clear();
510  HexLines.clear();
511  DisasmLineMaxLen = 0;
512 
514 
515  emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
516  STM.hasMAIInsts());
517 
518  if (isVerbose()) {
519  MCSectionELF *CommentSection =
520  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
521  OutStreamer->switchSection(CommentSection);
522 
523  if (!MFI->isEntryFunction()) {
524  OutStreamer->emitRawComment(" Function info:", false);
526  ResourceUsage->getResourceInfo(&MF.getFunction());
527  emitCommonFunctionComments(
528  Info.NumVGPR,
529  STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
530  Info.getTotalNumVGPRs(STM),
531  Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
532  Info.PrivateSegmentSize,
533  getFunctionCodeSize(MF), MFI);
534  return false;
535  }
536 
537  OutStreamer->emitRawComment(" Kernel info:", false);
538  emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
539  STM.hasMAIInsts()
540  ? CurrentProgramInfo.NumAccVGPR
541  : Optional<uint32_t>(),
542  CurrentProgramInfo.NumVGPR,
543  CurrentProgramInfo.NumSGPR,
544  CurrentProgramInfo.ScratchSize,
545  getFunctionCodeSize(MF), MFI);
546 
547  OutStreamer->emitRawComment(
548  " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
549  OutStreamer->emitRawComment(
550  " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
551  OutStreamer->emitRawComment(
552  " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
553  " bytes/workgroup (compile time only)", false);
554 
555  OutStreamer->emitRawComment(
556  " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
557  OutStreamer->emitRawComment(
558  " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
559 
560  OutStreamer->emitRawComment(
561  " NumSGPRsForWavesPerEU: " +
562  Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
563  OutStreamer->emitRawComment(
564  " NumVGPRsForWavesPerEU: " +
565  Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
566 
567  if (STM.hasGFX90AInsts())
568  OutStreamer->emitRawComment(
569  " AccumOffset: " +
570  Twine((CurrentProgramInfo.AccumOffset + 1) * 4), false);
571 
572  OutStreamer->emitRawComment(
573  " Occupancy: " +
574  Twine(CurrentProgramInfo.Occupancy), false);
575 
576  OutStreamer->emitRawComment(
577  " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
578 
579  OutStreamer->emitRawComment(
580  " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
581  Twine(G_00B84C_SCRATCH_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
582  OutStreamer->emitRawComment(
583  " COMPUTE_PGM_RSRC2:USER_SGPR: " +
584  Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
585  OutStreamer->emitRawComment(
586  " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
587  Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
588  OutStreamer->emitRawComment(
589  " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
590  Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
591  OutStreamer->emitRawComment(
592  " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
593  Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
594  OutStreamer->emitRawComment(
595  " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
596  Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
597  OutStreamer->emitRawComment(
598  " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
599  Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
600  false);
601 
602  assert(STM.hasGFX90AInsts() ||
603  CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
604  if (STM.hasGFX90AInsts()) {
605  OutStreamer->emitRawComment(
606  " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
607  Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
608  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
609  false);
610  OutStreamer->emitRawComment(
611  " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
612  Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
613  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
614  false);
615  }
616  }
617 
618  if (DumpCodeInstEmitter) {
619 
620  OutStreamer->switchSection(
621  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
622 
623  for (size_t i = 0; i < DisasmLines.size(); ++i) {
624  std::string Comment = "\n";
625  if (!HexLines[i].empty()) {
626  Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
627  Comment += " ; " + HexLines[i] + "\n";
628  }
629 
630  OutStreamer->emitBytes(StringRef(DisasmLines[i]));
631  OutStreamer->emitBytes(StringRef(Comment));
632  }
633  }
634 
635  return false;
636 }
637 
638 // TODO: Fold this into emitFunctionBodyStart.
639 void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
640  // In the beginning all features are either 'Any' or 'NotSupported',
641  // depending on global target features. This will cover empty modules.
643  *getGlobalSTI(), getGlobalSTI()->getFeatureString());
644 
645  // If module is empty, we are done.
646  if (M.empty())
647  return;
648 
649  // If module is not empty, need to find first 'Off' or 'On' feature
650  // setting per feature from functions in module.
651  for (auto &F : M) {
652  auto &TSTargetID = getTargetStreamer()->getTargetID();
653  if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
654  (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
655  break;
656 
657  const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
658  const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
659  if (TSTargetID->isXnackSupported())
660  if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
661  TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
662  if (TSTargetID->isSramEccSupported())
663  if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
664  TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
665  }
666 }
667 
668 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
669  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
670  const SIInstrInfo *TII = STM.getInstrInfo();
671 
672  uint64_t CodeSize = 0;
673 
674  for (const MachineBasicBlock &MBB : MF) {
675  for (const MachineInstr &MI : MBB) {
676  // TODO: CodeSize should account for multiple functions.
677 
678  // TODO: Should we count size of debug info?
679  if (MI.isDebugInstr())
680  continue;
681 
682  CodeSize += TII->getInstSizeInBytes(MI);
683  }
684  }
685 
686  return CodeSize;
687 }
688 
689 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
690  const MachineFunction &MF) {
692  ResourceUsage->getResourceInfo(&MF.getFunction());
693  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
694 
695  ProgInfo.NumArchVGPR = Info.NumVGPR;
696  ProgInfo.NumAccVGPR = Info.NumAGPR;
697  ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
698  ProgInfo.AccumOffset = alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1;
699  ProgInfo.TgSplit = STM.isTgSplitEnabled();
700  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
701  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
702  ProgInfo.VCCUsed = Info.UsesVCC;
703  ProgInfo.FlatUsed = Info.UsesFlatScratch;
704  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
705 
706  const uint64_t MaxScratchPerWorkitem =
708  if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
709  DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
710  ProgInfo.ScratchSize,
711  MaxScratchPerWorkitem, DS_Error);
712  MF.getFunction().getContext().diagnose(DiagStackSize);
713  }
714 
716 
717  // The calculations related to SGPR/VGPR blocks are
718  // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
719  // unified.
720  unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
721  &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
722 
723  // Check the addressable register limit before we add ExtraSGPRs.
725  !STM.hasSGPRInitBug()) {
726  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
727  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
728  // This can happen due to a compiler bug or when using inline asm.
731  MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR,
732  MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);
733  Ctx.diagnose(Diag);
734  ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
735  }
736  }
737 
738  // Account for extra SGPRs and VGPRs reserved for debugger use.
739  ProgInfo.NumSGPR += ExtraSGPRs;
740 
741  const Function &F = MF.getFunction();
742 
743  // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
744  // dispatch registers are function args.
745  unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
746 
747  if (isShader(F.getCallingConv())) {
748  bool IsPixelShader =
749  F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();
750 
751  // Calculate the number of VGPR registers based on the SPI input registers
752  uint32_t InputEna = 0;
753  uint32_t InputAddr = 0;
754  unsigned LastEna = 0;
755 
756  if (IsPixelShader) {
757  // Note for IsPixelShader:
758  // By this stage, all enabled inputs are tagged in InputAddr as well.
759  // We will use InputAddr to determine whether the input counts against the
760  // vgpr total and only use the InputEnable to determine the last input
761  // that is relevant - if extra arguments are used, then we have to honour
762  // the InputAddr for any intermediate non-enabled inputs.
763  InputEna = MFI->getPSInputEnable();
764  InputAddr = MFI->getPSInputAddr();
765 
766  // We only need to consider input args up to the last used arg.
767  assert((InputEna || InputAddr) &&
768  "PSInputAddr and PSInputEnable should "
769  "never both be 0 for AMDGPU_PS shaders");
770  // There are some rare circumstances where InputAddr is non-zero and
771  // InputEna can be set to 0. In this case we default to setting LastEna
772  // to 1.
773  LastEna = InputEna ? findLastSet(InputEna) + 1 : 1;
774  }
775 
776  // FIXME: We should be using the number of registers determined during
777  // calling convention lowering to legalize the types.
778  const DataLayout &DL = F.getParent()->getDataLayout();
779  unsigned PSArgCount = 0;
780  unsigned IntermediateVGPR = 0;
781  for (auto &Arg : F.args()) {
782  unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
783  if (Arg.hasAttribute(Attribute::InReg)) {
784  WaveDispatchNumSGPR += NumRegs;
785  } else {
786  // If this is a PS shader and we're processing the PS Input args (first
787  // 16 VGPR), use the InputEna and InputAddr bits to define how many
788  // VGPRs are actually used.
789  // Any extra VGPR arguments are handled as normal arguments (and
790  // contribute to the VGPR count whether they're used or not).
791  if (IsPixelShader && PSArgCount < 16) {
792  if ((1 << PSArgCount) & InputAddr) {
793  if (PSArgCount < LastEna)
794  WaveDispatchNumVGPR += NumRegs;
795  else
796  IntermediateVGPR += NumRegs;
797  }
798  PSArgCount++;
799  } else {
800  // If there are extra arguments we have to include the allocation for
801  // the non-used (but enabled with InputAddr) input arguments
802  if (IntermediateVGPR) {
803  WaveDispatchNumVGPR += IntermediateVGPR;
804  IntermediateVGPR = 0;
805  }
806  WaveDispatchNumVGPR += NumRegs;
807  }
808  }
809  }
810  ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
811  ProgInfo.NumArchVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
812  ProgInfo.NumVGPR =
813  Info.getTotalNumVGPRs(STM, Info.NumAGPR, ProgInfo.NumArchVGPR);
814  }
815 
816  // Adjust number of registers used to meet default/requested minimum/maximum
817  // number of waves per execution unit request.
818  ProgInfo.NumSGPRsForWavesPerEU = std::max(
819  std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
820  ProgInfo.NumVGPRsForWavesPerEU = std::max(
821  std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
822 
824  STM.hasSGPRInitBug()) {
825  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
826  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
827  // This can happen due to a compiler bug or when using inline asm to use
828  // the registers which are usually reserved for vcc etc.
830  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",
831  ProgInfo.NumSGPR, MaxAddressableNumSGPRs,
833  Ctx.diagnose(Diag);
834  ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
835  ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
836  }
837  }
838 
839  if (STM.hasSGPRInitBug()) {
840  ProgInfo.NumSGPR =
842  ProgInfo.NumSGPRsForWavesPerEU =
844  }
845 
846  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
848  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
849  MFI->getNumUserSGPRs(),
851  Ctx.diagnose(Diag);
852  }
853 
854  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
856  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
857  MFI->getLDSSize(),
859  Ctx.diagnose(Diag);
860  }
861 
863  &STM, ProgInfo.NumSGPRsForWavesPerEU);
865  &STM, ProgInfo.NumVGPRsForWavesPerEU);
866 
867  const SIModeRegisterDefaults Mode = MFI->getMode();
868 
869  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
870  // register.
871  ProgInfo.FloatMode = getFPMode(Mode);
872 
873  ProgInfo.IEEEMode = Mode.IEEE;
874 
875  // Make clamp modifier on NaN input returns 0.
876  ProgInfo.DX10Clamp = Mode.DX10Clamp;
877 
878  unsigned LDSAlignShift;
880  // LDS is allocated in 64 dword blocks.
881  LDSAlignShift = 8;
882  } else {
883  // LDS is allocated in 128 dword blocks.
884  LDSAlignShift = 9;
885  }
886 
887  ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
888  ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
889 
890  ProgInfo.LDSSize = MFI->getLDSSize();
891  ProgInfo.LDSBlocks =
892  alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
893 
894  // Scratch is allocated in 64-dword or 256-dword blocks.
895  unsigned ScratchAlignShift =
896  STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
897  // We need to program the hardware with the amount of scratch memory that
898  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
899  // scratch memory used per thread.
900  ProgInfo.ScratchBlocks = divideCeil(
901  ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift);
902 
903  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
904  ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
905  ProgInfo.MemOrdered = 1;
906  }
907 
908  // 0 = X, 1 = XY, 2 = XYZ
909  unsigned TIDIGCompCnt = 0;
910  if (MFI->hasWorkItemIDZ())
911  TIDIGCompCnt = 2;
912  else if (MFI->hasWorkItemIDY())
913  TIDIGCompCnt = 1;
914 
915  // The private segment wave byte offset is the last of the system SGPRs. We
916  // initially assumed it was allocated, and may have used it. It shouldn't harm
917  // anything to disable it if we know the stack isn't used here. We may still
918  // have emitted code reading it to initialize scratch, but if that's unused
919  // reading garbage should be OK.
920  const bool EnablePrivateSegment = ProgInfo.ScratchBlocks > 0;
921  ProgInfo.ComputePGMRSrc2 =
922  S_00B84C_SCRATCH_EN(EnablePrivateSegment) |
924  // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
930  S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
932  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
933  S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
934  S_00B84C_EXCP_EN(0);
935 
936  if (STM.hasGFX90AInsts()) {
938  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
939  ProgInfo.AccumOffset);
941  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
942  ProgInfo.TgSplit);
943  }
944 
945  ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
946  ProgInfo.NumSGPRsForWavesPerEU,
947  ProgInfo.NumVGPRsForWavesPerEU);
948 }
949 
950 static unsigned getRsrcReg(CallingConv::ID CallConv) {
951  switch (CallConv) {
952  default: [[fallthrough]];
960  }
961 }
962 
963 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
964  const SIProgramInfo &CurrentProgramInfo) {
966  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
967  unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
968 
971 
972  OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
973 
975  OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
976 
978  OutStreamer->emitInt32(
979  STM.getGeneration() >= AMDGPUSubtarget::GFX11
980  ? S_00B860_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
981  : S_00B860_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
982 
983  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
984  // 0" comment but I don't see a corresponding field in the register spec.
985  } else {
986  OutStreamer->emitInt32(RsrcReg);
987  OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
988  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
990  OutStreamer->emitInt32(
991  STM.getGeneration() >= AMDGPUSubtarget::GFX11
992  ? S_0286E8_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
993  : S_0286E8_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
994  }
995 
998  unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
999  ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1000  : CurrentProgramInfo.LDSBlocks;
1001  OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
1003  OutStreamer->emitInt32(MFI->getPSInputEnable());
1005  OutStreamer->emitInt32(MFI->getPSInputAddr());
1006  }
1007 
1008  OutStreamer->emitInt32(R_SPILLED_SGPRS);
1009  OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());
1010  OutStreamer->emitInt32(R_SPILLED_VGPRS);
1011  OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
1012 }
1013 
1014 // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
1015 // is AMDPAL. It stores each compute/SPI register setting and other PAL
1016 // metadata items into the PALMD::Metadata, combining with any provided by the
1017 // frontend as LLVM metadata. Once all functions are written, the PAL metadata
1018 // is then written as a single block in the .note section.
1019 void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
1020  const SIProgramInfo &CurrentProgramInfo) {
1022  auto CC = MF.getFunction().getCallingConv();
1023  auto MD = getTargetStreamer()->getPALMetadata();
1024 
1026  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1027 
1028  // Only set AGPRs for supported devices
1029  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1030  if (STM.hasMAIInsts()) {
1031  MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
1032  }
1033 
1034  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1035  MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
1036  if (AMDGPU::isCompute(CC)) {
1037  MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
1038  } else {
1039  if (CurrentProgramInfo.ScratchBlocks > 0)
1040  MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
1041  }
1042  // ScratchSize is in bytes, 16 aligned.
1043  MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
1045  unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
1046  ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1047  : CurrentProgramInfo.LDSBlocks;
1048  MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
1049  MD->setSpiPsInputEna(MFI->getPSInputEnable());
1050  MD->setSpiPsInputAddr(MFI->getPSInputAddr());
1051  }
1052 
1053  if (STM.isWave32())
1054  MD->setWave32(MF.getFunction().getCallingConv());
1055 }
1056 
1057 void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
1058  auto *MD = getTargetStreamer()->getPALMetadata();
1059  const MachineFrameInfo &MFI = MF.getFrameInfo();
1060  MD->setFunctionScratchSize(MF, MFI.getStackSize());
1061 
1062  // Set compute registers
1063  MD->setRsrc1(CallingConv::AMDGPU_CS,
1064  CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
1065  MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
1066 
1067  // Set optional info
1068  MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
1069  MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1070  MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1071 }
1072 
1073 // This is supposed to be log2(Size)
1075  switch (Size) {
1076  case 4:
1077  return AMD_ELEMENT_4_BYTES;
1078  case 8:
1079  return AMD_ELEMENT_8_BYTES;
1080  case 16:
1081  return AMD_ELEMENT_16_BYTES;
1082  default:
1083  llvm_unreachable("invalid private_element_size");
1084  }
1085 }
1086 
1087 void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1088  const SIProgramInfo &CurrentProgramInfo,
1089  const MachineFunction &MF) const {
1090  const Function &F = MF.getFunction();
1091  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
1092  F.getCallingConv() == CallingConv::SPIR_KERNEL);
1093 
1095  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1096 
1098 
1100  CurrentProgramInfo.getComputePGMRSrc1() |
1101  (CurrentProgramInfo.ComputePGMRSrc2 << 32);
1103 
1104  if (CurrentProgramInfo.DynamicCallStack)
1106 
1109  getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
1110 
1111  if (MFI->hasPrivateSegmentBuffer()) {
1112  Out.code_properties |=
1114  }
1115 
1116  if (MFI->hasDispatchPtr())
1118 
1119  if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
1121 
1122  if (MFI->hasKernargSegmentPtr())
1124 
1125  if (MFI->hasDispatchID())
1127 
1128  if (MFI->hasFlatScratchInit())
1130 
1131  if (MFI->hasDispatchPtr())
1133 
1134  if (STM.isXNACKEnabled())
1136 
1137  Align MaxKernArgAlign;
1138  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1139  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1140  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1141  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1142  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1143 
1144  // kernarg_segment_alignment is specified as log of the alignment.
1145  // The minimum alignment is 16.
1146  // FIXME: The metadata treats the minimum as 4?
1147  Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
1148 }
1149 
1151  const char *ExtraCode, raw_ostream &O) {
1152  // First try the generic code, which knows about modifiers like 'c' and 'n'.
1153  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
1154  return false;
1155 
1156  if (ExtraCode && ExtraCode[0]) {
1157  if (ExtraCode[1] != 0)
1158  return true; // Unknown modifier.
1159 
1160  switch (ExtraCode[0]) {
1161  case 'r':
1162  break;
1163  default:
1164  return true;
1165  }
1166  }
1167 
1168  // TODO: Should be able to support other operand types like globals.
1169  const MachineOperand &MO = MI->getOperand(OpNo);
1170  if (MO.isReg()) {
1173  return false;
1174  } else if (MO.isImm()) {
1175  int64_t Val = MO.getImm();
1176  if (AMDGPU::isInlinableIntLiteral(Val)) {
1177  O << Val;
1178  } else if (isUInt<16>(Val)) {
1179  O << format("0x%" PRIx16, static_cast<uint16_t>(Val));
1180  } else if (isUInt<32>(Val)) {
1181  O << format("0x%" PRIx32, static_cast<uint32_t>(Val));
1182  } else {
1183  O << format("0x%" PRIx64, static_cast<uint64_t>(Val));
1184  }
1185  return false;
1186  }
1187  return true;
1188 }
1189 
1194 }
1195 
1196 void AMDGPUAsmPrinter::emitResourceUsageRemarks(
1197  const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
1198  bool isModuleEntryFunction, bool hasMAIInsts) {
1199  if (!ORE)
1200  return;
1201 
1202  const char *Name = "kernel-resource-usage";
1203  const char *Indent = " ";
1204 
1205  // If the remark is not specifically enabled, do not output to yaml
1206  LLVMContext &Ctx = MF.getFunction().getContext();
1208  return;
1209 
1210  auto EmitResourceUsageRemark = [&](StringRef RemarkName,
1211  StringRef RemarkLabel, auto Argument) {
1212  // Add an indent for every line besides the line with the kernel name. This
1213  // makes it easier to tell which resource usage go with which kernel since
1214  // the kernel name will always be displayed first.
1215  std::string LabelStr = RemarkLabel.str() + ": ";
1216  if (!RemarkName.equals("FunctionName"))
1217  LabelStr = Indent + LabelStr;
1218 
1219  ORE->emit([&]() {
1220  return MachineOptimizationRemarkAnalysis(Name, RemarkName,
1222  &MF.front())
1223  << LabelStr << ore::NV(RemarkName, Argument);
1224  });
1225  };
1226 
1227  // FIXME: Formatting here is pretty nasty because clang does not accept
1228  // newlines from diagnostics. This forces us to emit multiple diagnostic
1229  // remarks to simulate newlines. If and when clang does accept newlines, this
1230  // formatting should be aggregated into one remark with newlines to avoid
1231  // printing multiple diagnostic location and diag opts.
1232  EmitResourceUsageRemark("FunctionName", "Function Name",
1233  MF.getFunction().getName());
1234  EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR);
1235  EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR);
1236  if (hasMAIInsts)
1237  EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
1238  EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
1239  CurrentProgramInfo.ScratchSize);
1240  EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
1241  CurrentProgramInfo.Occupancy);
1242  EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
1243  CurrentProgramInfo.SGPRSpill);
1244  EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
1245  CurrentProgramInfo.VGPRSpill);
1246  if (isModuleEntryFunction)
1247  EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
1248  CurrentProgramInfo.LDSSize);
1249 }
llvm::AMDGPUAsmPrinter::emitBasicBlockStart
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
Definition: AMDGPUAsmPrinter.cpp:302
llvm::MCTargetStreamer::getStreamer
MCStreamer & getStreamer()
Definition: MCStreamer.h:101
S_00B84C_TGID_Y_EN
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:956
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::AMDGPU::isHsaAbiVersion3
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:125
getFPMode
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode)
Definition: AMDGPUAsmPrinter.cpp:68
FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:1020
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::AMDGPUTargetStreamer::getTargetID
const Optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
Definition: AMDGPUTargetStreamer.h:97
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
getRsrcReg
static unsigned getRsrcReg(CallingConv::ID CallConv)
Definition: AMDGPUAsmPrinter.cpp:950
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
Definition: AMDKernelCodeT.h:95
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::AMDGPUPALMetadata::readFromIR
void readFromIR(Module &M)
Definition: AMDGPUPALMetadata.cpp:31
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:919
llvm::SIProgramInfo::SGPRSpill
unsigned SGPRSpill
Definition: SIProgramInfo.h:52
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
AMD_ELEMENT_4_BYTES
@ AMD_ELEMENT_4_BYTES
Definition: AMDKernelCodeT.h:55
llvm::AMDGPU::getIsaVersion
IsaVersion getIsaVersion(StringRef GPU)
Definition: TargetParser.cpp:193
SIMachineFunctionInfo.h
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:76
llvm::Function
Definition: Function.h:60
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setXnackSetting
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Definition: AMDGPUBaseInfo.h:140
llvm::SIMachineFunctionInfo::getNumSpilledSGPRs
unsigned getNumSpilledSGPRs() const
Definition: SIMachineFunctionInfo.h:858
AMDGPUHSAMetadataStreamer.h
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
@ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
Definition: AMDKernelCodeT.h:184
llvm::AMDGPUTargetStreamer::EmitDirectiveAMDGCNTarget
virtual void EmitDirectiveAMDGCNTarget()=0
MCSectionELF.h
FP_DENORM_MODE_DP
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:1039
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
amd_kernel_code_t::compute_pgm_resource_registers
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Definition: AMDKernelCodeT.h:558
llvm::AMDGPUAsmPrinter::AMDGPUAsmPrinter
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
Definition: AMDGPUAsmPrinter.cpp:88
llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1583
llvm::SIProgramInfo::WgpMode
uint32_t WgpMode
Definition: SIProgramInfo.h:35
llvm::SIProgramInfo::NumSGPR
uint32_t NumSGPR
Definition: SIProgramInfo.h:51
llvm::AMDGPUAsmPrinter::emitFunctionBodyStart
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:193
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
OptimizationRemarkEmitter.h
llvm::AsmPrinter::getNameWithPrefix
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:650
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
MCAssembler.h
llvm::AMDGPUAsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: AMDGPUAsmPrinter.cpp:1190
llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:2056
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition: SIDefines.h:941
llvm::AMDGPU::getAmdhsaCodeObjectVersion
unsigned getAmdhsaCodeObjectVersion()
Definition: AMDGPUBaseInfo.cpp:148
llvm::AMDGPUPALMetadata::setEntryPoint
void setEntryPoint(unsigned CC, StringRef Name)
Definition: AMDGPUPALMetadata.cpp:188
R_0286CC_SPI_PS_INPUT_ENA
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:980
llvm::AMDGPUMachineFunction::getLDSSize
uint32_t getLDSSize() const
Definition: AMDGPUMachineFunction.h:72
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:320
amd_element_byte_size_t
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
Definition: AMDKernelCodeT.h:53
AMDGPUAsmPrinter.h
llvm::MCSymbol::isDefined
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition: MCSymbol.h:243
amd_kernel_code_t::workgroup_group_segment_byte_size
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
Definition: AMDKernelCodeT.h:574
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:546
llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition: AMDGPUBaseInfo.cpp:801
llvm::AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition: AMDGPUAsmPrinter.cpp:179
AMD_HSA_BITS_SET
#define AMD_HSA_BITS_SET(dst, mask, val)
Definition: AMDKernelCodeT.h:43
llvm::AMDGPUTargetStreamer::EmitDirectiveHSACodeObjectVersion
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
llvm::SIMachineFunctionInfo::getPSInputEnable
unsigned getPSInputEnable() const
Definition: SIMachineFunctionInfo.h:878
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:214
llvm::Optional< uint32_t >
llvm::AMDGPU::SIModeRegisterDefaults
Definition: AMDGPUBaseInfo.h:1018
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV5
Definition: AMDGPUHSAMetadataStreamer.h:145
llvm::ELF::SHT_PROGBITS
@ SHT_PROGBITS
Definition: ELF.h:976
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
S_00B84C_USER_SGPR
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:947
llvm::AMDGPUAsmPrinter
Definition: AMDGPUAsmPrinter.h:40
llvm::SIProgramInfo::NumVGPR
uint32_t NumVGPR
Definition: SIProgramInfo.h:46
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
Definition: AMDKernelCodeT.h:107
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:285
llvm::AMDGPUTargetStreamer::EmitAMDKernelCodeT
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
llvm::MCSectionELF
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:26
TargetParser.h
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:189
llvm::AsmPrinter::ORE
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
Definition: AsmPrinter.h:114
llvm::SIProgramInfo::NumSGPRsForWavesPerEU
uint32_t NumSGPRsForWavesPerEU
Definition: SIProgramInfo.h:58
llvm::MCObjectFileInfo::getContext
MCContext & getContext() const
Definition: MCObjectFileInfo.h:245
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc2
uint32_t compute_pgm_rsrc2
Definition: AMDHSAKernelDescriptor.h:179
llvm::SIProgramInfo::LDSSize
uint32_t LDSSize
Definition: SIProgramInfo.h:54
llvm::SIProgramInfo::AccumOffset
uint32_t AccumOffset
Definition: SIProgramInfo.h:49
R_0286D0_SPI_PS_INPUT_ADDR
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:981
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:552
llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition: AMDGPUSubtarget.h:42
S_00B860_WAVESIZE_PreGFX11
#define S_00B860_WAVESIZE_PreGFX11(x)
Definition: SIDefines.h:1042
llvm::AMDGPUTargetStreamer::initializeTargetID
void initializeTargetID(const MCSubtargetInfo &STI)
Definition: AMDGPUTargetStreamer.h:103
llvm::AsmPrinter::emitGlobalVariable
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:677
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
AMDHSAKernelDescriptor.h
AMD_ELEMENT_8_BYTES
@ AMD_ELEMENT_8_BYTES
Definition: AMDKernelCodeT.h:56
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:215
S_00B84C_TIDIG_COMP_CNT
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:965
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::AMDGPU::IsaVersion
Instruction set architecture version.
Definition: TargetParser.h:113
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
R600AsmPrinter.h
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:120
S_00B84C_SCRATCH_EN
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:944
llvm::AMDGPU::isGFX90A
bool isGFX90A(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1826
llvm::SIProgramInfo::MemOrdered
uint32_t MemOrdered
Definition: SIProgramInfo.h:36
S_00B84C_EXCP_EN_MSB
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:969
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:865
llvm::AMDGPUMachineFunction::needsWaveLimiter
bool needsWaveLimiter() const
Definition: AMDGPUMachineFunction.h:94
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:105
ELF.h
TargetMachine.h
llvm::GCNSubtarget::getMaxWaveScratchSize
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:270
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:271
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1672
llvm::AsmPrinter::emitLinkage
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
Definition: AsmPrinter.cpp:610
llvm::SIProgramInfo::NumArchVGPR
uint32_t NumArchVGPR
Definition: SIProgramInfo.h:47
llvm::SIProgramInfo::ComputePGMRSrc2
uint64_t ComputePGMRSrc2
Definition: SIProgramInfo.h:43
GCNSubtarget.h
S_00B84C_TGID_Z_EN
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:959
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineFunction::setAlignment
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
Definition: MachineFunction.h:707
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
S_00B02C_EXTRA_LDS_SIZE
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:918
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
Definition: AMDKernelCodeT.h:87
G_00B84C_TRAP_HANDLER
#define G_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:951
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:99
llvm::SIProgramInfo::ScratchSize
uint64_t ScratchSize
Definition: SIProgramInfo.h:37
AMDGPUTargetInfo.h
llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:209
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:108
MCContext.h
llvm::SIProgramInfo::NumVGPRsForWavesPerEU
uint32_t NumVGPRsForWavesPerEU
Definition: SIProgramInfo.h:61
llvm::AMDGPU::hasMAIInsts
bool hasMAIInsts(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1838
llvm::SIMachineFunctionInfo::hasWorkGroupIDZ
bool hasWorkGroupIDZ() const
Definition: SIMachineFunctionInfo.h:711
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AsmPrinter::SetupMachineFunction
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
Definition: AsmPrinter.cpp:2312
llvm::AsmPrinter::emitFunctionEntryLabel
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:1033
llvm::SIProgramInfo::LDSBlocks
uint32_t LDSBlocks
Definition: SIProgramInfo.h:40
llvm::amdhsa::kernel_descriptor_t::group_segment_fixed_size
uint32_t group_segment_fixed_size
Definition: AMDHSAKernelDescriptor.h:171
llvm::DiagnosticInfoResourceLimit
Diagnostic information for stack size etc.
Definition: DiagnosticInfo.h:186
llvm::AsmPrinter::isBlockOnlyReachableByFallthrough
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition: AsmPrinter.cpp:3777
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::AMDGPUResourceUsageAnalysis::getResourceInfo
const SIFunctionResourceInfo & getResourceInfo(const Function *F) const
Definition: AMDGPUResourceUsageAnalysis.h:67
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:924
llvm::AMDGPU
Definition: AMDGPUMetadataVerifier.h:34
llvm::AMDGPUAsmPrinter::IsTargetStreamerInitialized
bool IsTargetStreamerInitialized
Definition: AMDGPUAsmPrinter.h:140
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1261
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:943
llvm::MachineOptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: MachineOptimizationRemarkEmitter.h:109
G_00B84C_TGID_Y_EN
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:957
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
amd_kernel_code_t::workitem_private_segment_byte_size
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
Definition: AMDKernelCodeT.h:568
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
getElementByteSizeValue
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
Definition: AMDGPUAsmPrinter.cpp:1074
llvm::SIProgramInfo::ComputePGMRSrc3GFX90A
uint64_t ComputePGMRSrc3GFX90A
Definition: SIProgramInfo.h:44
llvm::AMDGPUAsmPrinter::PrintAsmOperand
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AMDGPUAsmPrinter.cpp:1150
llvm::SIProgramInfo::DynamicCallStack
bool DynamicCallStack
Definition: SIProgramInfo.h:68
AMDHSA_BITS_GET
#define AMDHSA_BITS_GET(SRC, MSK)
Definition: AMDHSAKernelDescriptor.h:37
llvm::AMDGPUAsmPrinter::emitStartOfAsmFile
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
Definition: AMDGPUAsmPrinter.cpp:118
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:577
llvm::amdhsa::kernel_descriptor_t::kernel_code_properties
uint16_t kernel_code_properties
Definition: AMDHSAKernelDescriptor.h:180
llvm::IndexedInstrProf::Version
const uint64_t Version
Definition: InstrProf.h:1056
llvm::AMDGPU::HSAMD::MetadataStreamerYamlV2
Definition: AMDGPUHSAMetadataStreamer.h:157
llvm::MCAssembler::getEmitterPtr
MCCodeEmitter * getEmitterPtr() const
Definition: MCAssembler.h:325
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:198
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:554
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:458
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Definition: AMDGPUSubtarget.h:208
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3649
llvm::AMDGPU::isGFX10Plus
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1786
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc3
uint32_t compute_pgm_rsrc3
Definition: AMDHSAKernelDescriptor.h:177
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::SmallString< 128 >
llvm::AMDGPU::isHsaAbiVersion2
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:119
llvm::DK_ResourceLimit
@ DK_ResourceLimit
Definition: DiagnosticInfo.h:62
llvm::AMDGPUTargetStreamer::EmitAmdhsaKernelDescriptor
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr)=0
llvm::StringRef::equals
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
MachineOptimizationRemarkEmitter.h
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
S_00B84C_TGID_X_EN
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:953
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:208
R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:943
AMD_CODE_PROPERTY_IS_PTR64
@ AMD_CODE_PROPERTY_IS_PTR64
Definition: AMDKernelCodeT.h:172
llvm::LLVMContext::getDiagHandlerPtr
const DiagnosticHandler * getDiagHandlerPtr() const
getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by setDiagnosticHandler.
Definition: LLVMContext.cpp:363
R_0286E8_SPI_TMPRING_SIZE
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:1045
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::amdhsa::kernel_descriptor_t::private_segment_fixed_size
uint32_t private_segment_fixed_size
Definition: AMDHSAKernelDescriptor.h:172
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:683
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
AMDGPUResourceUsageAnalysis.h
Analyzes how many registers and other resources are used by functions.
llvm::AMDGPU::isInlinableIntLiteral
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
Definition: AMDGPUBaseInfo.h:943
llvm::MCAssembler
Definition: MCAssembler.h:73
llvm::SIProgramInfo::FloatMode
uint32_t FloatMode
Definition: SIProgramInfo.h:30
AMD_ELEMENT_16_BYTES
@ AMD_ELEMENT_16_BYTES
Definition: AMDKernelCodeT.h:57
AMDGPUTargetStreamer.h
llvm::MachineOptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
Definition: MachineOptimizationRemarkEmitter.cpp:48
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
llvm::GlobalValue::getVisibility
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:242
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
R_SPILLED_VGPRS
#define R_SPILLED_VGPRS
Definition: SIDefines.h:1059
llvm::SIMachineFunctionInfo::hasWorkGroupInfo
bool hasWorkGroupInfo() const
Definition: SIMachineFunctionInfo.h:715
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:650
llvm::Triple::getOS
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:355
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:209
llvm::SIMachineFunctionInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: SIMachineFunctionInfo.h:930
llvm::AMDGPUAsmPrinter::emitEndOfAsmFile
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
Definition: AMDGPUAsmPrinter.cpp:156
R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:910
llvm::SIMachineFunctionInfo::hasDispatchID
bool hasDispatchID() const
Definition: SIMachineFunctionInfo.h:695
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::AMDGPU::initDefaultAMDKernelCodeT
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:915
R_00B848_COMPUTE_PGM_RSRC1
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:983
S_0286E8_WAVESIZE_PreGFX11
#define S_0286E8_WAVESIZE_PreGFX11(x)
Definition: SIDefines.h:1046
llvm::SIMachineFunctionInfo::getNumSpilledVGPRs
unsigned getNumSpilledVGPRs() const
Definition: SIMachineFunctionInfo.h:862
llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:80
AMDGPUInstPrinter.h
llvm::AMDGPUTargetStreamer::EmitISAVersion
virtual bool EmitISAVersion()=0
llvm::AMDGPUAsmPrinter::getTargetStreamer
AMDGPUTargetStreamer * getTargetStreamer() const
Definition: AMDGPUAsmPrinter.cpp:112
llvm::MCSymbol::isVariable
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition: MCSymbol.h:293
R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition: SIDefines.h:933
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:655
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1666
llvm::AMDGPUAsmPrinter::DisasmLines
std::vector< std::string > DisasmLines
Definition: AMDGPUAsmPrinter.h:138
llvm::AMDGPUAsmPrinter::getGlobalSTI
const MCSubtargetInfo * getGlobalSTI() const
Definition: AMDGPUAsmPrinter.cpp:108
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:201
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:672
createAMDGPUAsmPrinterPass
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Definition: AMDGPUAsmPrinter.cpp:76
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:219
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::MCContext::reportError
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1053
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1216
llvm::SIProgramInfo::VGPRBlocks
uint32_t VGPRBlocks
Definition: SIProgramInfo.h:27
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1140
llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:102
llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:94
llvm::SIProgramInfo::getPGMRSrc1
uint64_t getPGMRSrc1(CallingConv::ID CC) const
Definition: SIProgramInfo.cpp:31
llvm::findLastSet
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
Definition: MathExtras.h:274
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1045
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::AMDGPUMachineFunction::isMemoryBound
bool isMemoryBound() const
Definition: AMDGPUMachineFunction.h:90
llvm::AMDGPU::IsaInfo::getNumSGPRBlocks
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
Definition: AMDGPUBaseInfo.cpp:832
amd_kernel_code_t::kernarg_segment_alignment
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment.
Definition: AMDKernelCodeT.h:634
amd_kernel_code_t::kernarg_segment_byte_size
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel.
Definition: AMDKernelCodeT.h:583
llvm::getCPU
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Definition: AVRTargetMachine.cpp:32
llvm::MachineFunction
Definition: MachineFunction.h:257
S_0286E8_WAVESIZE_GFX11Plus
#define S_0286E8_WAVESIZE_GFX11Plus(x)
Definition: SIDefines.h:1047
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1571
FP_DENORM_MODE_SP
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:1038
llvm::MCSymbol::redefineIfPossible
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition: MCSymbol.h:225
llvm::AMDGPU::isHsaAbiVersion5
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:137
llvm::SIMachineFunctionInfo::getMode
AMDGPU::SIModeRegisterDefaults getMode() const
Definition: SIMachineFunctionInfo.h:558
llvm::SIMachineFunctionInfo::hasWorkGroupIDX
bool hasWorkGroupIDX() const
Definition: SIMachineFunctionInfo.h:703
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1115
llvm::AMDGPUTargetStreamer::EmitCodeEnd
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)=0
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::AMDGPUAsmPrinter::emitFunctionBodyEnd
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:238
AMDGPU.h
llvm::AsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: AsmPrinter.cpp:424
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1691
TargetLoweringObjectFile.h
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
uint32_t
llvm::AMDGPU::IsaInfo::getNumVGPRBlocks
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:905
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::DiagnosticHandler::isAnalysisRemarkEnabled
virtual bool isAnalysisRemarkEnabled(StringRef PassName) const
Return true if analysis remarks are enabled, override to provide different implementation.
Definition: DiagnosticHandler.cpp:71
G_00B84C_SCRATCH_EN
#define G_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:945
llvm::DiagnosticInfoStackSize
Definition: DiagnosticInfo.h:223
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::AMDGPUTargetStreamer
Definition: AMDGPUTargetStreamer.h:34
amd_kernel_code_t
AMD Kernel Code Object (amd_kernel_code_t).
Definition: AMDKernelCodeT.h:526
llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition: AMDGPUBaseInfo.h:94
R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition: SIDefines.h:932
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::SIMachineFunctionInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition: SIMachineFunctionInfo.h:679
AMDHSA_BITS_SET
#define AMDHSA_BITS_SET(DST, MSK, VAL)
Definition: AMDHSAKernelDescriptor.h:42
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::AMDGPUAsmPrinter::DisasmLineMaxLen
size_t DisasmLineMaxLen
Definition: AMDGPUAsmPrinter.h:139
llvm::amdhsa::kernel_descriptor_t::kernarg_size
uint32_t kernarg_size
Definition: AMDHSAKernelDescriptor.h:173
G_00B84C_TGID_Z_EN
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:960
FP_ROUND_MODE_SP
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:1027
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1146
FP_ROUND_MODE_DP
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:1028
llvm::SIProgramInfo::FlatUsed
bool FlatUsed
Definition: SIProgramInfo.h:55
llvm::SIMachineFunctionInfo::getNumUserSGPRs
unsigned getNumUserSGPRs() const
Definition: SIMachineFunctionInfo.h:773
llvm::SIProgramInfo::IEEEMode
uint32_t IEEEMode
Definition: SIProgramInfo.h:34
llvm::SIProgramInfo::NumAccVGPR
uint32_t NumAccVGPR
Definition: SIProgramInfo.h:48
llvm::SIMachineFunctionInfo::hasWorkItemIDZ
bool hasWorkItemIDZ() const
Definition: SIMachineFunctionInfo.h:733
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::SIProgramInfo::Occupancy
uint32_t Occupancy
Definition: SIProgramInfo.h:64
llvm::AMDGPU::HSAMD::Kernel::Key::SymbolName
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
Definition: AMDGPUMetadata.h:386
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:248
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::AMDGPUTargetStreamer::EmitAMDGPUSymbolType
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
llvm::Triple::OSType
OSType
Definition: Triple.h:182
S_00B84C_TG_SIZE_EN
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:962
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:256
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
std
Definition: BitVector.h:851
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::SIMachineFunctionInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition: SIMachineFunctionInfo.h:683
S_00B84C_EXCP_EN
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:976
uint16_t
llvm::ELF::STT_AMDGPU_HSA_KERNEL
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1239
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::AMDGPUMachineFunction::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition: AMDGPUMachineFunction.h:84
MachineFrameInfo.h
amd_kernel_code_t::code_properties
uint32_t code_properties
Code properties.
Definition: AMDKernelCodeT.h:562
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3
Definition: AMDGPUHSAMetadataStreamer.h:63
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:251
llvm::createR600AsmPrinterPass
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Definition: R600AsmPrinter.cpp:31
R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:1041
Success
#define Success
Definition: AArch64Disassembler.cpp:280
llvm::Triple::AMDPAL
@ AMDPAL
Definition: Triple.h:217
llvm::SIProgramInfo::TgSplit
uint32_t TgSplit
Definition: SIProgramInfo.h:50
DiagnosticInfo.h
llvm::SIMachineFunctionInfo::hasWorkItemIDY
bool hasWorkItemIDY() const
Definition: SIMachineFunctionInfo.h:729
llvm::amdhsa::kernel_descriptor_t
Definition: AMDHSAKernelDescriptor.h:170
llvm::AsmPrinter
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:84
S_00B028_SGPRS
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:912
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
llvm::SIProgramInfo::VCCUsed
bool VCCUsed
Definition: SIProgramInfo.h:71
llvm::AsmPrinter::emitVisibility
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
Definition: AsmPrinter.cpp:3737
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:819
llvm::AMDGPUTargetStreamer::EmitDirectiveHSACodeObjectISAV2
virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::SIProgramInfo::getComputePGMRSrc1
uint64_t getComputePGMRSrc1() const
Compute the value of the ComputePGMRsrc1 register.
Definition: SIProgramInfo.cpp:23
llvm::AMDGPUAsmPrinter::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUAsmPrinter.cpp:104
R_SPILLED_SGPRS
#define R_SPILLED_SGPRS
Definition: SIDefines.h:1058
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc1
uint32_t compute_pgm_rsrc1
Definition: AMDHSAKernelDescriptor.h:178
llvm::AMDGPUAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AMDGPUAsmPrinter.cpp:349
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
Definition: AMDKernelCodeT.h:163
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:121
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:266
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
amd_kernel_code_t::workitem_vgpr_count
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
Definition: AMDKernelCodeT.h:599
llvm::SIProgramInfo::VGPRSpill
unsigned VGPRSpill
Definition: SIProgramInfo.h:53
llvm::SIProgramInfo
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
G_00B84C_USER_SGPR
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:948
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:558
G_00B84C_TIDIG_COMP_CNT
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:966
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:87
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:164
llvm::TargetRegistry::RegisterAsmPrinter
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
Definition: TargetRegistry.h:938
S_00B028_VGPRS
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:911
llvm::AsmPrinter::getObjFileLowering
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:383
MCStreamer.h
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:351
AMDKernelCodeT.h
llvm::AsmPrinter::emitFunctionBody
void emitFunctionBody()
This method emits the body and trailer for a function.
Definition: AsmPrinter.cpp:1550
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
Definition: AMDKernelCodeT.h:103
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:126
llvm::AMDGPU::isHsaAbiVersion3AndAbove
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:143
S_00B84C_LDS_SIZE
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:973
llvm::AMDGPUResourceUsageAnalysis
Definition: AMDGPUResourceUsageAnalysis.h:27
G_00B84C_TGID_X_EN
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:954
llvm::SIMachineFunctionInfo::hasQueuePtr
bool hasQueuePtr() const
Definition: SIMachineFunctionInfo.h:687
llvm::SIMachineFunctionInfo::hasWorkGroupIDY
bool hasWorkGroupIDY() const
Definition: SIMachineFunctionInfo.h:707
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:277
llvm::AMDGPUAsmPrinter::emitFunctionEntryLabel
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AMDGPUAsmPrinter.cpp:277
llvm::SIMachineFunctionInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition: SIMachineFunctionInfo.h:691
llvm::SIProgramInfo::DX10Clamp
uint32_t DX10Clamp
Definition: SIProgramInfo.h:32
llvm::SIMachineFunctionInfo::getPSInputAddr
unsigned getPSInputAddr() const
Definition: SIMachineFunctionInfo.h:874
llvm::SIProgramInfo::ScratchBlocks
uint32_t ScratchBlocks
Definition: SIProgramInfo.h:41
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:290
llvm::StringRef::str
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:221
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
Definition: AMDKernelCodeT.h:99
S_00B860_WAVESIZE_GFX11Plus
#define S_00B860_WAVESIZE_GFX11Plus(x)
Definition: SIDefines.h:1043
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUTargetStreamer::getPALMetadata
AMDGPUPALMetadata * getPALMetadata()
Definition: AMDGPUTargetStreamer.h:46
llvm::SIProgramInfo::SGPRBlocks
uint32_t SGPRBlocks
Definition: SIProgramInfo.h:28
llvm::AsmPrinter::isVerbose
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:265
LLVMInitializeAMDGPUAsmPrinter
void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter()
Definition: AMDGPUAsmPrinter.cpp:81
llvm::lltok::LabelStr
@ LabelStr
Definition: LLToken.h:416
llvm::AMDGPUAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AMDGPUAsmPrinter.cpp:458
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
Definition: AMDGPUResourceUsageAnalysis.h:32
llvm::AMDGPUAsmPrinter::HexLines
std::vector< std::string > HexLines
Definition: AMDGPUAsmPrinter.h:138
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:23
TargetRegistry.h
llvm::AMDGPUAsmPrinter::emitGlobalVariable
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Definition: AMDGPUAsmPrinter.cpp:314
R_00B02C_SPI_SHADER_PGM_RSRC2_PS
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:917
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:76
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
Definition: AMDKernelCodeT.h:193
llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:379
llvm::Optional::value_or
constexpr T value_or(U &&alt) const &
Definition: Optional.h:334
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:667
amd_kernel_code_t::wavefront_sgpr_count
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
Definition: AMDKernelCodeT.h:595
llvm::SystemZICMP::Any
@ Any
Definition: SystemZISelLowering.h:377
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
Definition: AMDKernelCodeT.h:91
llvm::DS_Error
@ DS_Error
Definition: DiagnosticInfo.h:50
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:699
S_00B84C_TRAP_HANDLER
#define S_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:950
llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:475
llvm::AMDGPUInstPrinter::printRegOperand
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
Definition: AMDGPUInstPrinter.cpp:342
llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4
Definition: AMDGPUHSAMetadataStreamer.h:132
llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition: AMDGPUSubtarget.h:124
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:729
AMDGPUBaseInfo.h