LLVM  14.0.0git
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
12 /// code. When passed an MCAsmStreamer it prints assembly and when passed
13 /// an MCObjectStreamer it outputs binary code.
14 //
15 //===----------------------------------------------------------------------===//
16 //
17 
18 #include "AMDGPUAsmPrinter.h"
19 #include "AMDGPU.h"
22 #include "AMDKernelCodeT.h"
23 #include "GCNSubtarget.h"
26 #include "R600AsmPrinter.h"
27 #include "SIMachineFunctionInfo.h"
29 #include "Utils/AMDGPUBaseInfo.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/MC/MCAssembler.h"
32 #include "llvm/MC/MCContext.h"
33 #include "llvm/MC/MCSectionELF.h"
34 #include "llvm/MC/MCStreamer.h"
39 
40 using namespace llvm;
41 using namespace llvm::AMDGPU;
42 
43 // This should get the default rounding mode from the kernel. We just set the
44 // default here, but this could change if the OpenCL rounding mode pragmas are
45 // used.
46 //
47 // The denormal mode here should match what is reported by the OpenCL runtime
48 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
49 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
50 //
51 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
52 // precision, and leaves single precision to flush all and does not report
53 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
54 // CL_FP_DENORM for both.
55 //
56 // FIXME: It seems some instructions do not support single precision denormals
57 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
58 // and sin_f32, cos_f32 on most parts).
59 
60 // We want to use these instructions, and using fp32 denormals also causes
61 // instructions to run at the double precision rate for the device so it's
62 // probably best to just report no single precision denormals.
66  FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |
67  FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());
68 }
69 
70 static AsmPrinter *
72  std::unique_ptr<MCStreamer> &&Streamer) {
73  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
74 }
75 
81 }
82 
84  std::unique_ptr<MCStreamer> Streamer)
85  : AsmPrinter(TM, std::move(Streamer)) {
88  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
89  } else if (isHsaAbiVersion3(getGlobalSTI())) {
90  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
91  } else {
92  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
93  }
94  }
95 }
96 
98  return "AMDGPU Assembly Printer";
99 }
100 
102  return TM.getMCSubtargetInfo();
103 }
104 
106  if (!OutStreamer)
107  return nullptr;
108  return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
109 }
110 
112  // TODO: Which one is called first, emitStartOfAsmFile or
113  // emitFunctionBodyStart?
114  if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
115  initializeTargetID(M);
116 
117  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
119  return;
120 
123 
125  HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
126 
129 
131  return;
132 
133  // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
136 
137  // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
140  Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
141 }
142 
144  // Following code requires TargetStreamer to be present.
145  if (!getTargetStreamer())
146  return;
147 
148  if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
151 
152  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
153  // Emit HSA Metadata (NT_AMD_HSA_METADATA).
154  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
155  HSAMetadataStream->end();
156  bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
157  (void)Success;
158  assert(Success && "Malformed HSA Metadata");
159  }
160 }
161 
163  const MachineBasicBlock *MBB) const {
165  return false;
166 
167  if (MBB->empty())
168  return true;
169 
170  // If this is a block implementing a long branch, an expression relative to
171  // the start of the block is needed. to the start of the block.
172  // XXX - Is there a smarter way to check this?
173  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
174 }
175 
178  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
179  const Function &F = MF->getFunction();
180 
181  // TODO: Which one is called first, emitStartOfAsmFile or
182  // emitFunctionBodyStart?
184  initializeTargetID(*F.getParent());
185 
186  const auto &FunctionTargetID = STM.getTargetID();
187  // Make sure function's xnack settings are compatible with module's
188  // xnack settings.
189  if (FunctionTargetID.isXnackSupported() &&
190  FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
191  FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {
192  OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) +
193  "' function does not match module xnack setting");
194  return;
195  }
196  // Make sure function's sramecc settings are compatible with module's
197  // sramecc settings.
198  if (FunctionTargetID.isSramEccSupported() &&
199  FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
200  FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) {
201  OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) +
202  "' function does not match module sramecc setting");
203  return;
204  }
205 
206  if (!MFI.isEntryFunction())
207  return;
208 
209  if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
210  (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
211  F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
212  amd_kernel_code_t KernelCode;
213  getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
214  getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
215  }
216 
217  if (STM.isAmdHsaOS())
218  HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
219 }
220 
223  if (!MFI.isEntryFunction())
224  return;
225 
226  if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
228  return;
229 
230  auto &Streamer = getTargetStreamer()->getStreamer();
231  auto &Context = Streamer.getContext();
232  auto &ObjectFileInfo = *Context.getObjectFileInfo();
233  auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
234 
235  Streamer.PushSection();
236  Streamer.SwitchSection(&ReadOnlySection);
237 
238  // CP microcode requires the kernel descriptor to be allocated on 64 byte
239  // alignment.
240  Streamer.emitValueToAlignment(64, 0, 1, 0);
241  if (ReadOnlySection.getAlignment() < 64)
242  ReadOnlySection.setAlignment(Align(64));
243 
244  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
245 
246  SmallString<128> KernelName;
247  getNameWithPrefix(KernelName, &MF->getFunction());
249  STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
250  CurrentProgramInfo.NumVGPRsForWavesPerEU,
251  CurrentProgramInfo.NumSGPRsForWavesPerEU -
253  CurrentProgramInfo.VCCUsed,
254  CurrentProgramInfo.FlatUsed),
255  CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
256 
257  Streamer.PopSection();
258 }
259 
261  if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
264  return;
265  }
266 
268  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
269  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
274  }
275  if (DumpCodeInstEmitter) {
276  // Disassemble function name label to text.
277  DisasmLines.push_back(MF->getName().str() + ":");
279  HexLines.push_back("");
280  }
281 
283 }
284 
286  if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
287  // Write a line for the basic block label if it is not only fallthrough.
288  DisasmLines.push_back(
289  (Twine("BB") + Twine(getFunctionNumber())
290  + "_" + Twine(MBB.getNumber()) + ":").str());
292  HexLines.push_back("");
293  }
295 }
296 
299  if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
301  Twine(GV->getName()) +
302  ": unsupported initializer for address space");
303  return;
304  }
305 
306  // LDS variables aren't emitted in HSA or PAL yet.
307  const Triple::OSType OS = TM.getTargetTriple().getOS();
308  if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
309  return;
310 
311  MCSymbol *GVSym = getSymbol(GV);
312 
313  GVSym->redefineIfPossible();
314  if (GVSym->isDefined() || GVSym->isVariable())
315  report_fatal_error("symbol '" + Twine(GVSym->getName()) +
316  "' is already defined");
317 
318  const DataLayout &DL = GV->getParent()->getDataLayout();
319  uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
320  Align Alignment = GV->getAlign().getValueOr(Align(4));
321 
322  emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
323  emitLinkage(GV, GVSym);
324  if (auto TS = getTargetStreamer())
325  TS->emitAMDGPULDS(GVSym, Size, Alignment);
326  return;
327  }
328 
330 }
331 
333  // Pad with s_code_end to help tools and guard against instruction prefetch
334  // causing stale data in caches. Arguably this should be done by the linker,
335  // which is why this isn't done for Mesa.
336  const MCSubtargetInfo &STI = *getGlobalSTI();
337  if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&
338  (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
339  STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
340  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
342  }
343 
345 }
346 
347 // Print comments that apply to both callable functions and entry points.
348 void AMDGPUAsmPrinter::emitCommonFunctionComments(
349  uint32_t NumVGPR,
350  Optional<uint32_t> NumAGPR,
351  uint32_t TotalNumVGPR,
352  uint32_t NumSGPR,
353  uint64_t ScratchSize,
354  uint64_t CodeSize,
355  const AMDGPUMachineFunction *MFI) {
356  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
357  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
358  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
359  if (NumAGPR) {
360  OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
361  OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
362  false);
363  }
364  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
365  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
366  false);
367 }
368 
369 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
370  const MachineFunction &MF) const {
372  uint16_t KernelCodeProperties = 0;
373 
374  if (MFI.hasPrivateSegmentBuffer()) {
375  KernelCodeProperties |=
376  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
377  }
378  if (MFI.hasDispatchPtr()) {
379  KernelCodeProperties |=
380  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
381  }
382  if (MFI.hasQueuePtr()) {
383  KernelCodeProperties |=
384  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
385  }
386  if (MFI.hasKernargSegmentPtr()) {
387  KernelCodeProperties |=
388  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
389  }
390  if (MFI.hasDispatchID()) {
391  KernelCodeProperties |=
392  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
393  }
394  if (MFI.hasFlatScratchInit()) {
395  KernelCodeProperties |=
396  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
397  }
399  KernelCodeProperties |=
400  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
401  }
402 
403  return KernelCodeProperties;
404 }
405 
406 amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
407  const MachineFunction &MF,
408  const SIProgramInfo &PI) const {
409  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
410  const Function &F = MF.getFunction();
411 
412  amdhsa::kernel_descriptor_t KernelDescriptor;
413  memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
414 
418 
419  KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
420  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
421 
422  Align MaxKernArgAlign;
423  KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
424 
425  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
426  KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
427  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
428 
429  assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
430  if (STM.hasGFX90AInsts())
431  KernelDescriptor.compute_pgm_rsrc3 =
432  CurrentProgramInfo.ComputePGMRSrc3GFX90A;
433 
434  return KernelDescriptor;
435 }
436 
438  ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
439  CurrentProgramInfo = SIProgramInfo();
440 
442 
443  // The starting address of all shader programs must be 256 bytes aligned.
444  // Regular functions just need the basic required instruction alignment.
445  MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
446 
448 
449  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
451  // FIXME: This should be an explicit check for Mesa.
452  if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
453  MCSectionELF *ConfigSection =
454  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
455  OutStreamer->SwitchSection(ConfigSection);
456  }
457 
458  if (MFI->isModuleEntryFunction()) {
459  getSIProgramInfo(CurrentProgramInfo, MF);
460  }
461 
462  if (STM.isAmdPalOS()) {
463  if (MFI->isEntryFunction())
464  EmitPALMetadata(MF, CurrentProgramInfo);
465  else if (MFI->isModuleEntryFunction())
466  emitPALFunctionMetadata(MF);
467  } else if (!STM.isAmdHsaOS()) {
468  EmitProgramInfoSI(MF, CurrentProgramInfo);
469  }
470 
471  DumpCodeInstEmitter = nullptr;
472  if (STM.dumpCode()) {
473  // For -dumpcode, get the assembler out of the streamer, even if it does
474  // not really want to let us have it. This only works with -filetype=obj.
475  bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
476  OutStreamer->setUseAssemblerInfoForParsing(true);
477  MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
478  OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
479  if (Assembler)
480  DumpCodeInstEmitter = Assembler->getEmitterPtr();
481  }
482 
483  DisasmLines.clear();
484  HexLines.clear();
485  DisasmLineMaxLen = 0;
486 
488 
489  if (isVerbose()) {
490  MCSectionELF *CommentSection =
491  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
492  OutStreamer->SwitchSection(CommentSection);
493 
494  if (!MFI->isEntryFunction()) {
495  OutStreamer->emitRawComment(" Function info:", false);
497  ResourceUsage->getResourceInfo(&MF.getFunction());
498  emitCommonFunctionComments(
499  Info.NumVGPR,
500  STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
501  Info.getTotalNumVGPRs(STM),
502  Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
503  Info.PrivateSegmentSize,
504  getFunctionCodeSize(MF), MFI);
505  return false;
506  }
507 
508  OutStreamer->emitRawComment(" Kernel info:", false);
509  emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
510  STM.hasMAIInsts()
511  ? CurrentProgramInfo.NumAccVGPR
512  : Optional<uint32_t>(),
513  CurrentProgramInfo.NumVGPR,
514  CurrentProgramInfo.NumSGPR,
515  CurrentProgramInfo.ScratchSize,
516  getFunctionCodeSize(MF), MFI);
517 
518  OutStreamer->emitRawComment(
519  " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
520  OutStreamer->emitRawComment(
521  " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
522  OutStreamer->emitRawComment(
523  " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
524  " bytes/workgroup (compile time only)", false);
525 
526  OutStreamer->emitRawComment(
527  " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
528  OutStreamer->emitRawComment(
529  " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
530 
531  OutStreamer->emitRawComment(
532  " NumSGPRsForWavesPerEU: " +
533  Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
534  OutStreamer->emitRawComment(
535  " NumVGPRsForWavesPerEU: " +
536  Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
537 
538  if (STM.hasGFX90AInsts())
539  OutStreamer->emitRawComment(
540  " AccumOffset: " +
541  Twine((CurrentProgramInfo.AccumOffset + 1) * 4), false);
542 
543  OutStreamer->emitRawComment(
544  " Occupancy: " +
545  Twine(CurrentProgramInfo.Occupancy), false);
546 
547  OutStreamer->emitRawComment(
548  " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
549 
550  OutStreamer->emitRawComment(
551  " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
552  Twine(G_00B84C_SCRATCH_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
553  OutStreamer->emitRawComment(
554  " COMPUTE_PGM_RSRC2:USER_SGPR: " +
555  Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
556  OutStreamer->emitRawComment(
557  " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
558  Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
559  OutStreamer->emitRawComment(
560  " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
561  Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
562  OutStreamer->emitRawComment(
563  " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
564  Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
565  OutStreamer->emitRawComment(
566  " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
567  Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
568  OutStreamer->emitRawComment(
569  " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
570  Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
571  false);
572 
573  assert(STM.hasGFX90AInsts() ||
574  CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
575  if (STM.hasGFX90AInsts()) {
576  OutStreamer->emitRawComment(
577  " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
578  Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
579  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
580  false);
581  OutStreamer->emitRawComment(
582  " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
583  Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
584  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
585  false);
586  }
587  }
588 
589  if (DumpCodeInstEmitter) {
590 
591  OutStreamer->SwitchSection(
592  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
593 
594  for (size_t i = 0; i < DisasmLines.size(); ++i) {
595  std::string Comment = "\n";
596  if (!HexLines[i].empty()) {
597  Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
598  Comment += " ; " + HexLines[i] + "\n";
599  }
600 
601  OutStreamer->emitBytes(StringRef(DisasmLines[i]));
602  OutStreamer->emitBytes(StringRef(Comment));
603  }
604  }
605 
606  return false;
607 }
608 
609 // TODO: Fold this into emitFunctionBodyStart.
610 void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
611  // In the beginning all features are either 'Any' or 'NotSupported',
612  // depending on global target features. This will cover empty modules.
614  *getGlobalSTI(), getGlobalSTI()->getFeatureString());
615 
616  // If module is empty, we are done.
617  if (M.empty())
618  return;
619 
620  // If module is not empty, need to find first 'Off' or 'On' feature
621  // setting per feature from functions in module.
622  for (auto &F : M) {
623  auto &TSTargetID = getTargetStreamer()->getTargetID();
624  if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
625  (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
626  break;
627 
628  const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
629  const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
630  if (TSTargetID->isXnackSupported())
631  if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
632  TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
633  if (TSTargetID->isSramEccSupported())
634  if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
635  TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
636  }
637 }
638 
639 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
640  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
641  const SIInstrInfo *TII = STM.getInstrInfo();
642 
643  uint64_t CodeSize = 0;
644 
645  for (const MachineBasicBlock &MBB : MF) {
646  for (const MachineInstr &MI : MBB) {
647  // TODO: CodeSize should account for multiple functions.
648 
649  // TODO: Should we count size of debug info?
650  if (MI.isDebugInstr())
651  continue;
652 
653  CodeSize += TII->getInstSizeInBytes(MI);
654  }
655  }
656 
657  return CodeSize;
658 }
659 
660 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
661  const MachineFunction &MF) {
663  ResourceUsage->getResourceInfo(&MF.getFunction());
664  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
665 
666  ProgInfo.NumArchVGPR = Info.NumVGPR;
667  ProgInfo.NumAccVGPR = Info.NumAGPR;
668  ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
669  ProgInfo.AccumOffset = alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1;
670  ProgInfo.TgSplit = STM.isTgSplitEnabled();
671  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
672  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
673  ProgInfo.VCCUsed = Info.UsesVCC;
674  ProgInfo.FlatUsed = Info.UsesFlatScratch;
675  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
676 
677  const uint64_t MaxScratchPerWorkitem =
679  if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
680  DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
681  ProgInfo.ScratchSize,
682  MaxScratchPerWorkitem, DS_Error);
683  MF.getFunction().getContext().diagnose(DiagStackSize);
684  }
685 
687 
688  // The calculations related to SGPR/VGPR blocks are
689  // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
690  // unified.
691  unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
692  &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
693 
694  // Check the addressable register limit before we add ExtraSGPRs.
696  !STM.hasSGPRInitBug()) {
697  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
698  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
699  // This can happen due to a compiler bug or when using inline asm.
702  MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR,
703  MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);
704  Ctx.diagnose(Diag);
705  ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
706  }
707  }
708 
709  // Account for extra SGPRs and VGPRs reserved for debugger use.
710  ProgInfo.NumSGPR += ExtraSGPRs;
711 
712  const Function &F = MF.getFunction();
713 
714  // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
715  // dispatch registers are function args.
716  unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
717 
718  if (isShader(F.getCallingConv())) {
719  // FIXME: We should be using the number of registers determined during
720  // calling convention lowering to legalize the types.
721  const DataLayout &DL = F.getParent()->getDataLayout();
722  for (auto &Arg : F.args()) {
723  unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
724  if (Arg.hasAttribute(Attribute::InReg))
725  WaveDispatchNumSGPR += NumRegs;
726  else
727  WaveDispatchNumVGPR += NumRegs;
728  }
729  ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
730  ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
731  }
732 
733  // Adjust number of registers used to meet default/requested minimum/maximum
734  // number of waves per execution unit request.
735  ProgInfo.NumSGPRsForWavesPerEU = std::max(
736  std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
737  ProgInfo.NumVGPRsForWavesPerEU = std::max(
738  std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
739 
741  STM.hasSGPRInitBug()) {
742  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
743  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
744  // This can happen due to a compiler bug or when using inline asm to use
745  // the registers which are usually reserved for vcc etc.
747  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",
748  ProgInfo.NumSGPR, MaxAddressableNumSGPRs,
750  Ctx.diagnose(Diag);
751  ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
752  ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
753  }
754  }
755 
756  if (STM.hasSGPRInitBug()) {
757  ProgInfo.NumSGPR =
759  ProgInfo.NumSGPRsForWavesPerEU =
761  }
762 
763  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
765  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
766  MFI->getNumUserSGPRs(),
768  Ctx.diagnose(Diag);
769  }
770 
771  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
773  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
774  MFI->getLDSSize(),
776  Ctx.diagnose(Diag);
777  }
778 
780  &STM, ProgInfo.NumSGPRsForWavesPerEU);
782  &STM, ProgInfo.NumVGPRsForWavesPerEU);
783 
784  const SIModeRegisterDefaults Mode = MFI->getMode();
785 
786  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
787  // register.
788  ProgInfo.FloatMode = getFPMode(Mode);
789 
790  ProgInfo.IEEEMode = Mode.IEEE;
791 
792  // Make clamp modifier on NaN input returns 0.
793  ProgInfo.DX10Clamp = Mode.DX10Clamp;
794 
795  unsigned LDSAlignShift;
797  // LDS is allocated in 64 dword blocks.
798  LDSAlignShift = 8;
799  } else {
800  // LDS is allocated in 128 dword blocks.
801  LDSAlignShift = 9;
802  }
803 
804  unsigned LDSSpillSize =
806 
807  ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
808  ProgInfo.LDSBlocks =
809  alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
810 
811  // Scratch is allocated in 256 dword blocks.
812  unsigned ScratchAlignShift = 10;
813  // We need to program the hardware with the amount of scratch memory that
814  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
815  // scratch memory used per thread.
816  ProgInfo.ScratchBlocks =
817  alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
818  1ULL << ScratchAlignShift) >>
819  ScratchAlignShift;
820 
821  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
822  ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
823  ProgInfo.MemOrdered = 1;
824  }
825 
826  // 0 = X, 1 = XY, 2 = XYZ
827  unsigned TIDIGCompCnt = 0;
828  if (MFI->hasWorkItemIDZ())
829  TIDIGCompCnt = 2;
830  else if (MFI->hasWorkItemIDY())
831  TIDIGCompCnt = 1;
832 
833  ProgInfo.ComputePGMRSrc2 =
834  S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
836  // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
842  S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
844  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
845  S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
846  S_00B84C_EXCP_EN(0);
847 
848  if (STM.hasGFX90AInsts()) {
850  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
851  ProgInfo.AccumOffset);
853  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
854  ProgInfo.TgSplit);
855  }
856 
857  ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
858  ProgInfo.NumSGPRsForWavesPerEU,
859  ProgInfo.NumVGPRsForWavesPerEU);
860 }
861 
862 static unsigned getRsrcReg(CallingConv::ID CallConv) {
863  switch (CallConv) {
864  default: LLVM_FALLTHROUGH;
872  }
873 }
874 
875 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
876  const SIProgramInfo &CurrentProgramInfo) {
878  unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
879 
882 
883  OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
884 
886  OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
887 
889  OutStreamer->emitInt32(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks));
890 
891  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
892  // 0" comment but I don't see a corresponding field in the register spec.
893  } else {
894  OutStreamer->emitInt32(RsrcReg);
895  OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
896  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
898  OutStreamer->emitIntValue(
899  S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
900  }
901 
904  OutStreamer->emitInt32(
905  S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
907  OutStreamer->emitInt32(MFI->getPSInputEnable());
909  OutStreamer->emitInt32(MFI->getPSInputAddr());
910  }
911 
912  OutStreamer->emitInt32(R_SPILLED_SGPRS);
913  OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());
914  OutStreamer->emitInt32(R_SPILLED_VGPRS);
915  OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
916 }
917 
918 // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
919 // is AMDPAL. It stores each compute/SPI register setting and other PAL
920 // metadata items into the PALMD::Metadata, combining with any provided by the
921 // frontend as LLVM metadata. Once all functions are written, the PAL metadata
922 // is then written as a single block in the .note section.
923 void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
924  const SIProgramInfo &CurrentProgramInfo) {
926  auto CC = MF.getFunction().getCallingConv();
927  auto MD = getTargetStreamer()->getPALMetadata();
928 
929  MD->setEntryPoint(CC, MF.getFunction().getName());
930  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
931  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
932  MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
933  if (AMDGPU::isCompute(CC)) {
934  MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
935  } else {
936  if (CurrentProgramInfo.ScratchBlocks > 0)
937  MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
938  }
939  // ScratchSize is in bytes, 16 aligned.
940  MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
942  MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
943  MD->setSpiPsInputEna(MFI->getPSInputEnable());
944  MD->setSpiPsInputAddr(MFI->getPSInputAddr());
945  }
946 
947  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
948  if (STM.isWave32())
949  MD->setWave32(MF.getFunction().getCallingConv());
950 }
951 
952 void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
953  auto *MD = getTargetStreamer()->getPALMetadata();
954  const MachineFrameInfo &MFI = MF.getFrameInfo();
955  MD->setFunctionScratchSize(MF, MFI.getStackSize());
956 
957  // Set compute registers
958  MD->setRsrc1(CallingConv::AMDGPU_CS,
959  CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
960  MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
961 
962  // Set optional info
963  MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
964  MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
965  MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
966 }
967 
968 // This is supposed to be log2(Size)
970  switch (Size) {
971  case 4:
972  return AMD_ELEMENT_4_BYTES;
973  case 8:
974  return AMD_ELEMENT_8_BYTES;
975  case 16:
976  return AMD_ELEMENT_16_BYTES;
977  default:
978  llvm_unreachable("invalid private_element_size");
979  }
980 }
981 
982 void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
983  const SIProgramInfo &CurrentProgramInfo,
984  const MachineFunction &MF) const {
985  const Function &F = MF.getFunction();
986  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
987  F.getCallingConv() == CallingConv::SPIR_KERNEL);
988 
990  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
991 
993 
995  CurrentProgramInfo.getComputePGMRSrc1() |
996  (CurrentProgramInfo.ComputePGMRSrc2 << 32);
998 
999  if (CurrentProgramInfo.DynamicCallStack)
1001 
1004  getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
1005 
1006  if (MFI->hasPrivateSegmentBuffer()) {
1007  Out.code_properties |=
1009  }
1010 
1011  if (MFI->hasDispatchPtr())
1013 
1014  if (MFI->hasQueuePtr())
1016 
1017  if (MFI->hasKernargSegmentPtr())
1019 
1020  if (MFI->hasDispatchID())
1022 
1023  if (MFI->hasFlatScratchInit())
1025 
1026  if (MFI->hasDispatchPtr())
1028 
1029  if (STM.isXNACKEnabled())
1031 
1032  Align MaxKernArgAlign;
1033  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1034  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1035  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1036  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1037  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1038 
1039  // kernarg_segment_alignment is specified as log of the alignment.
1040  // The minimum alignment is 16.
1041  Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
1042 }
1043 
1045  const char *ExtraCode, raw_ostream &O) {
1046  // First try the generic code, which knows about modifiers like 'c' and 'n'.
1047  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
1048  return false;
1049 
1050  if (ExtraCode && ExtraCode[0]) {
1051  if (ExtraCode[1] != 0)
1052  return true; // Unknown modifier.
1053 
1054  switch (ExtraCode[0]) {
1055  case 'r':
1056  break;
1057  default:
1058  return true;
1059  }
1060  }
1061 
1062  // TODO: Should be able to support other operand types like globals.
1063  const MachineOperand &MO = MI->getOperand(OpNo);
1064  if (MO.isReg()) {
1067  return false;
1068  } else if (MO.isImm()) {
1069  int64_t Val = MO.getImm();
1070  if (AMDGPU::isInlinableIntLiteral(Val)) {
1071  O << Val;
1072  } else if (isUInt<16>(Val)) {
1073  O << format("0x%" PRIx16, static_cast<uint16_t>(Val));
1074  } else if (isUInt<32>(Val)) {
1075  O << format("0x%" PRIx32, static_cast<uint32_t>(Val));
1076  } else {
1077  O << format("0x%" PRIx64, static_cast<uint64_t>(Val));
1078  }
1079  return false;
1080  }
1081  return true;
1082 }
1083 
1088 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::AMDGPUAsmPrinter::emitBasicBlockStart
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
Definition: AMDGPUAsmPrinter.cpp:285
llvm::MCTargetStreamer::getStreamer
MCStreamer & getStreamer()
Definition: MCStreamer.h:99
S_00B84C_TGID_Y_EN
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:822
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::AMDGPU::isHsaAbiVersion3
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:114
getFPMode
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode)
Definition: AMDGPUAsmPrinter.cpp:63
FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:886
llvm::AMDGPUTargetStreamer::getTargetID
const Optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
Definition: AMDGPUTargetStreamer.h:97
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
getRsrcReg
static unsigned getRsrcReg(CallingConv::ID CallConv)
Definition: AMDGPUAsmPrinter.cpp:862
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
Definition: AMDKernelCodeT.h:95
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::AMDGPUPALMetadata::readFromIR
void readFromIR(Module &M)
Definition: AMDGPUPALMetadata.cpp:31
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:785
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
AMD_ELEMENT_4_BYTES
@ AMD_ELEMENT_4_BYTES
Definition: AMDKernelCodeT.h:55
llvm::AMDGPU::getIsaVersion
IsaVersion getIsaVersion(StringRef GPU)
Definition: TargetParser.cpp:189
SIMachineFunctionInfo.h
llvm::SIMachineFunctionInfo::getLDSWaveSpillSize
unsigned getLDSWaveSpillSize() const
Definition: SIMachineFunctionInfo.h:900
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:72
llvm::Function
Definition: Function.h:61
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setXnackSetting
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Definition: AMDGPUBaseInfo.h:120
llvm::SIMachineFunctionInfo::getNumSpilledSGPRs
unsigned getNumSpilledSGPRs() const
Definition: SIMachineFunctionInfo.h:808
AMDGPUHSAMetadataStreamer.h
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
@ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
Definition: AMDKernelCodeT.h:184
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:223
llvm::AMDGPUTargetStreamer::EmitDirectiveAMDGCNTarget
virtual void EmitDirectiveAMDGCNTarget()=0
MCSectionELF.h
FP_DENORM_MODE_DP
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:905
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
amd_kernel_code_t::compute_pgm_resource_registers
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Definition: AMDKernelCodeT.h:558
llvm::AMDGPUAsmPrinter::AMDGPUAsmPrinter
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
Definition: AMDGPUAsmPrinter.cpp:83
llvm::SIProgramInfo::WgpMode
uint32_t WgpMode
Definition: SIProgramInfo.h:35
llvm::SIProgramInfo::NumSGPR
uint32_t NumSGPR
Definition: SIProgramInfo.h:51
llvm::AMDGPUAsmPrinter::emitFunctionBodyStart
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:176
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:213
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::AsmPrinter::getNameWithPrefix
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:481
MCAssembler.h
llvm::AMDGPUAsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: AMDGPUAsmPrinter.cpp:1084
llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:1707
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition: SIDefines.h:807
llvm::AMDGPUPALMetadata::setEntryPoint
void setEntryPoint(unsigned CC, StringRef Name)
Definition: AMDGPUPALMetadata.cpp:188
R_0286CC_SPI_PS_INPUT_ENA
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:846
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
amd_element_byte_size_t
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
Definition: AMDKernelCodeT.h:53
AMDGPUAsmPrinter.h
llvm::MCSymbol::isDefined
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition: MCSymbol.h:243
amd_kernel_code_t::workgroup_group_segment_byte_size
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
Definition: AMDKernelCodeT.h:574
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:528
llvm::AMDGPUMachineFunction::getLDSSize
unsigned getLDSSize() const
Definition: AMDGPUMachineFunction.h:70
llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition: AMDGPUBaseInfo.cpp:657
llvm::AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition: AMDGPUAsmPrinter.cpp:162
AMD_HSA_BITS_SET
#define AMD_HSA_BITS_SET(dst, mask, val)
Definition: AMDKernelCodeT.h:43
llvm::AMDGPU::HSAMD::MetadataStreamerV3
Definition: AMDGPUHSAMetadataStreamer.h:58
llvm::AMDGPUTargetStreamer::EmitDirectiveHSACodeObjectVersion
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
llvm::SIMachineFunctionInfo::getPSInputEnable
unsigned getPSInputEnable() const
Definition: SIMachineFunctionInfo.h:828
llvm::Optional< uint32_t >
llvm::AMDGPU::SIModeRegisterDefaults
Definition: AMDGPUBaseInfo.h:911
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
S_00B84C_USER_SGPR
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:813
llvm::AMDGPUAsmPrinter
Definition: AMDGPUAsmPrinter.h:40
llvm::SIProgramInfo::NumVGPR
uint32_t NumVGPR
Definition: SIProgramInfo.h:46
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
Definition: AMDKernelCodeT.h:107
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:248
llvm::AMDGPUTargetStreamer::EmitAMDKernelCodeT
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
llvm::MCSectionELF
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:28
llvm::SIProgramInfo::NumSGPRsForWavesPerEU
uint32_t NumSGPRsForWavesPerEU
Definition: SIProgramInfo.h:56
llvm::MCObjectFileInfo::getContext
MCContext & getContext() const
Definition: MCObjectFileInfo.h:235
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc2
uint32_t compute_pgm_rsrc2
Definition: AMDHSAKernelDescriptor.h:174
llvm::SIProgramInfo::LDSSize
uint32_t LDSSize
Definition: SIProgramInfo.h:52
llvm::SIProgramInfo::AccumOffset
uint32_t AccumOffset
Definition: SIProgramInfo.h:49
R_0286D0_SPI_PS_INPUT_ADDR
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:847
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:683
llvm::AMDGPUTargetStreamer::initializeTargetID
void initializeTargetID(const MCSubtargetInfo &STI)
Definition: AMDGPUTargetStreamer.h:103
llvm::AsmPrinter::emitGlobalVariable
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:508
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:92
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
AMDHSAKernelDescriptor.h
AMD_ELEMENT_8_BYTES
@ AMD_ELEMENT_8_BYTES
Definition: AMDKernelCodeT.h:56
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:213
S_00B84C_TIDIG_COMP_CNT
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:831
llvm::ELF::SHT_PROGBITS
@ SHT_PROGBITS
Definition: ELF.h:910
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::AMDGPU::IsaVersion
Instruction set architecture version.
Definition: TargetParser.h:105
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
R600AsmPrinter.h
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:118
S_00B84C_SCRATCH_EN
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:810
llvm::AMDGPU::isGFX90A
bool isGFX90A(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1473
llvm::SIProgramInfo::MemOrdered
uint32_t MemOrdered
Definition: SIProgramInfo.h:36
S_00B84C_EXCP_EN_MSB
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:835
llvm::AMDGPUMachineFunction::needsWaveLimiter
bool needsWaveLimiter() const
Definition: AMDGPUMachineFunction.h:92
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:85
TargetMachine.h
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1358
llvm::AsmPrinter::emitLinkage
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
Definition: AsmPrinter.cpp:441
llvm::SIProgramInfo::NumArchVGPR
uint32_t NumArchVGPR
Definition: SIProgramInfo.h:47
llvm::SIProgramInfo::ComputePGMRSrc2
uint64_t ComputePGMRSrc2
Definition: SIProgramInfo.h:43
GCNSubtarget.h
S_00B84C_TGID_Z_EN
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:825
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::MachineFunction::setAlignment
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
Definition: MachineFunction.h:677
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:724
S_00B02C_EXTRA_LDS_SIZE
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:784
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
Definition: AMDKernelCodeT.h:87
G_00B84C_TRAP_HANDLER
#define G_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:817
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:97
llvm::SIProgramInfo::ScratchSize
uint64_t ScratchSize
Definition: SIProgramInfo.h:37
AMDGPUTargetInfo.h
llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:207
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:107
MCContext.h
llvm::SIProgramInfo::NumVGPRsForWavesPerEU
uint32_t NumVGPRsForWavesPerEU
Definition: SIProgramInfo.h:59
llvm::SIMachineFunctionInfo::hasWorkGroupIDZ
bool hasWorkGroupIDZ() const
Definition: SIMachineFunctionInfo.h:659
llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition: AMDGPUBaseInfo.h:74
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AsmPrinter::SetupMachineFunction
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
Definition: AsmPrinter.cpp:1962
llvm::AsmPrinter::emitFunctionEntryLabel
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:843
llvm::SIProgramInfo::LDSBlocks
uint32_t LDSBlocks
Definition: SIProgramInfo.h:40
llvm::amdhsa::kernel_descriptor_t::group_segment_fixed_size
uint32_t group_segment_fixed_size
Definition: AMDHSAKernelDescriptor.h:166
llvm::DiagnosticInfoResourceLimit
Diagnostic information for stack size etc.
Definition: DiagnosticInfo.h:180
llvm::AsmPrinter::isBlockOnlyReachableByFallthrough
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition: AsmPrinter.cpp:3372
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::AMDGPUResourceUsageAnalysis::getResourceInfo
const SIFunctionResourceInfo & getResourceInfo(const Function *F) const
Definition: AMDGPUResourceUsageAnalysis.h:63
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:790
llvm::AMDGPU
Definition: AMDGPUMetadataVerifier.h:22
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1136
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:897
G_00B84C_TGID_Y_EN
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:823
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
amd_kernel_code_t::workitem_private_segment_byte_size
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
Definition: AMDKernelCodeT.h:568
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
getElementByteSizeValue
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
Definition: AMDGPUAsmPrinter.cpp:969
llvm::SIProgramInfo::ComputePGMRSrc3GFX90A
uint64_t ComputePGMRSrc3GFX90A
Definition: SIProgramInfo.h:44
llvm::AMDGPUAsmPrinter::PrintAsmOperand
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AMDGPUAsmPrinter.cpp:1044
llvm::SIProgramInfo::DynamicCallStack
bool DynamicCallStack
Definition: SIProgramInfo.h:66
AMDHSA_BITS_GET
#define AMDHSA_BITS_GET(SRC, MSK)
Definition: AMDHSAKernelDescriptor.h:37
llvm::AMDGPUAsmPrinter::emitStartOfAsmFile
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
Definition: AMDGPUAsmPrinter.cpp:111
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:553
llvm::amdhsa::kernel_descriptor_t::kernel_code_properties
uint16_t kernel_code_properties
Definition: AMDHSAKernelDescriptor.h:175
llvm::IndexedInstrProf::Version
const uint64_t Version
Definition: InstrProf.h:991
llvm::MCAssembler::getEmitterPtr
MCCodeEmitter * getEmitterPtr() const
Definition: MCAssembler.h:296
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:198
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:536
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:207
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:446
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Definition: AMDGPUSubtarget.h:204
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3244
llvm::AMDGPU::isGFX10Plus
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1455
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc3
uint32_t compute_pgm_rsrc3
Definition: AMDHSAKernelDescriptor.h:172
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::AMDGPUMachineFunction::getMode
AMDGPU::SIModeRegisterDefaults getMode() const
Definition: AMDGPUMachineFunction.h:74
llvm::SmallString< 128 >
llvm::AMDGPU::isHsaAbiVersion2
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:108
llvm::DK_ResourceLimit
@ DK_ResourceLimit
Definition: DiagnosticInfo.h:57
llvm::AMDGPUTargetStreamer::EmitAmdhsaKernelDescriptor
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr)=0
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:231
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:196
S_00B84C_TGID_X_EN
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:819
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:190
R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:809
llvm::GCNSubtarget::MaxWaveScratchSize
static const unsigned MaxWaveScratchSize
Definition: GCNSubtarget.h:204
AMD_CODE_PROPERTY_IS_PTR64
@ AMD_CODE_PROPERTY_IS_PTR64
Definition: AMDKernelCodeT.h:172
R_0286E8_SPI_TMPRING_SIZE
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:910
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::Optional::getValueOr
constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION
Definition: Optional.h:297
llvm::amdhsa::kernel_descriptor_t::private_segment_fixed_size
uint32_t private_segment_fixed_size
Definition: AMDHSAKernelDescriptor.h:167
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:192
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:136
AMDGPUResourceUsageAnalysis.h
Analyzes how many registers and other resources are used by functions.
llvm::AMDGPU::isInlinableIntLiteral
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
Definition: AMDGPUBaseInfo.h:836
llvm::MCAssembler
Definition: MCAssembler.h:60
llvm::SIProgramInfo::FloatMode
uint32_t FloatMode
Definition: SIProgramInfo.h:30
AMD_ELEMENT_16_BYTES
@ AMD_ELEMENT_16_BYTES
Definition: AMDKernelCodeT.h:57
AMDGPUTargetStreamer.h
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
uint64_t
llvm::GlobalValue::getVisibility
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:229
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:239
R_SPILLED_VGPRS
#define R_SPILLED_VGPRS
Definition: SIDefines.h:923
llvm::SIMachineFunctionInfo::hasWorkGroupInfo
bool hasWorkGroupInfo() const
Definition: SIMachineFunctionInfo.h:663
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::Triple::getOS
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:316
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:211
llvm::SIMachineFunctionInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: SIMachineFunctionInfo.h:880
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:204
llvm::AMDGPUAsmPrinter::emitEndOfAsmFile
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
Definition: AMDGPUAsmPrinter.cpp:143
R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:776
llvm::SIMachineFunctionInfo::hasDispatchID
bool hasDispatchID() const
Definition: SIMachineFunctionInfo.h:643
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AMDGPU::initDefaultAMDKernelCodeT
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:764
R_00B848_COMPUTE_PGM_RSRC1
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:849
llvm::SIMachineFunctionInfo::getNumSpilledVGPRs
unsigned getNumSpilledVGPRs() const
Definition: SIMachineFunctionInfo.h:812
llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:78
AMDGPUInstPrinter.h
llvm::AMDGPUTargetStreamer::EmitISAVersion
virtual bool EmitISAVersion()=0
llvm::AMDGPUAsmPrinter::getTargetStreamer
AMDGPUTargetStreamer * getTargetStreamer() const
Definition: AMDGPUAsmPrinter.cpp:105
llvm::MCSymbol::isVariable
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition: MCSymbol.h:293
R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition: SIDefines.h:799
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:354
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:541
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:486
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1609
llvm::AMDGPUAsmPrinter::DisasmLines
std::vector< std::string > DisasmLines
Definition: AMDGPUAsmPrinter.h:136
llvm::AMDGPUAsmPrinter::getGlobalSTI
const MCSubtargetInfo * getGlobalSTI() const
Definition: AMDGPUAsmPrinter.cpp:101
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:642
createAMDGPUAsmPrinterPass
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Definition: AMDGPUAsmPrinter.cpp:71
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::MCContext::reportError
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:963
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1095
llvm::SIProgramInfo::VGPRBlocks
uint32_t VGPRBlocks
Definition: SIProgramInfo.h:27
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1019
llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:100
llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:92
llvm::SIProgramInfo::getPGMRSrc1
uint64_t getPGMRSrc1(CallingConv::ID CC) const
Definition: SIProgramInfo.cpp:31
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:965
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::AMDGPUMachineFunction::isMemoryBound
bool isMemoryBound() const
Definition: AMDGPUMachineFunction.h:88
llvm::AMDGPU::IsaInfo::getNumSGPRBlocks
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
Definition: AMDGPUBaseInfo.cpp:687
amd_kernel_code_t::kernarg_segment_alignment
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment.
Definition: AMDKernelCodeT.h:634
amd_kernel_code_t::kernarg_segment_byte_size
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel.
Definition: AMDKernelCodeT.h:583
llvm::getCPU
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Definition: AVRTargetMachine.cpp:32
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1532
FP_DENORM_MODE_SP
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:904
llvm::MCSymbol::redefineIfPossible
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition: MCSymbol.h:225
S_0286E8_WAVESIZE
#define S_0286E8_WAVESIZE(x)
Definition: SIDefines.h:911
llvm::SIMachineFunctionInfo::hasWorkGroupIDX
bool hasWorkGroupIDX() const
Definition: SIMachineFunctionInfo.h:651
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1056
llvm::AMDGPUTargetStreamer::EmitCodeEnd
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)=0
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AMDGPUAsmPrinter::emitFunctionBodyEnd
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:221
AMDGPU.h
llvm::AsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: AsmPrinter.cpp:260
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1377
TargetLoweringObjectFile.h
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
uint32_t
llvm::AMDGPU::IsaInfo::getNumVGPRBlocks
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:754
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
G_00B84C_SCRATCH_EN
#define G_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:811
llvm::DiagnosticInfoStackSize
Definition: DiagnosticInfo.h:217
llvm::AMDGPUTargetStreamer
Definition: AMDGPUTargetStreamer.h:34
amd_kernel_code_t
AMD Kernel Code Object (amd_kernel_code_t).
Definition: AMDKernelCodeT.h:526
R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition: SIDefines.h:798
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::SIMachineFunctionInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition: SIMachineFunctionInfo.h:627
AMDHSA_BITS_SET
#define AMDHSA_BITS_SET(DST, MSK, VAL)
Definition: AMDHSAKernelDescriptor.h:42
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::AMDGPUAsmPrinter::DisasmLineMaxLen
size_t DisasmLineMaxLen
Definition: AMDGPUAsmPrinter.h:137
llvm::amdhsa::kernel_descriptor_t::kernarg_size
uint32_t kernarg_size
Definition: AMDHSAKernelDescriptor.h:168
G_00B84C_TGID_Z_EN
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:826
FP_ROUND_MODE_SP
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:893
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1025
FP_ROUND_MODE_DP
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:894
llvm::SIProgramInfo::FlatUsed
bool FlatUsed
Definition: SIProgramInfo.h:53
llvm::SIMachineFunctionInfo::getNumUserSGPRs
unsigned getNumUserSGPRs() const
Definition: SIMachineFunctionInfo.h:723
llvm::SIProgramInfo::IEEEMode
uint32_t IEEEMode
Definition: SIProgramInfo.h:34
llvm::SIProgramInfo::NumAccVGPR
uint32_t NumAccVGPR
Definition: SIProgramInfo.h:48
llvm::SIMachineFunctionInfo::hasWorkItemIDZ
bool hasWorkItemIDZ() const
Definition: SIMachineFunctionInfo.h:679
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::SIProgramInfo::Occupancy
uint32_t Occupancy
Definition: SIProgramInfo.h:62
llvm::AMDGPU::HSAMD::Kernel::Key::SymbolName
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
Definition: AMDGPUMetadata.h:381
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:228
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::AMDGPUTargetStreamer::EmitAMDGPUSymbolType
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
llvm::Triple::OSType
OSType
Definition: Triple.h:164
S_00B84C_TG_SIZE_EN
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:828
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
std
Definition: BitVector.h:838
llvm::SIMachineFunctionInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const
Definition: SIMachineFunctionInfo.h:864
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::SIMachineFunctionInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition: SIMachineFunctionInfo.h:631
S_00B84C_EXCP_EN
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:842
uint16_t
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::AMDGPUMachineFunction::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition: AMDGPUMachineFunction.h:82
amd_kernel_code_t::code_properties
uint32_t code_properties
Code properties.
Definition: AMDKernelCodeT.h:562
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:249
llvm::createR600AsmPrinterPass
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Definition: R600AsmPrinter.cpp:31
R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:907
Success
#define Success
Definition: AArch64Disassembler.cpp:260
llvm::Triple::AMDPAL
@ AMDPAL
Definition: Triple.h:197
llvm::SIProgramInfo::TgSplit
uint32_t TgSplit
Definition: SIProgramInfo.h:50
DiagnosticInfo.h
llvm::SIMachineFunctionInfo::hasWorkItemIDY
bool hasWorkItemIDY() const
Definition: SIMachineFunctionInfo.h:675
llvm::amdhsa::kernel_descriptor_t
Definition: AMDHSAKernelDescriptor.h:165
llvm::AsmPrinter
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:82
S_00B028_SGPRS
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:778
llvm::SIProgramInfo::VCCUsed
bool VCCUsed
Definition: SIProgramInfo.h:69
llvm::AsmPrinter::emitVisibility
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
Definition: AsmPrinter.cpp:3332
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:784
llvm::AMDGPU::HSAMD::MetadataStreamerV2
Definition: AMDGPUHSAMetadataStreamer.h:142
llvm::AMDGPUTargetStreamer::EmitDirectiveHSACodeObjectISAV2
virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:323
llvm::SIProgramInfo::getComputePGMRSrc1
uint64_t getComputePGMRSrc1() const
Compute the value of the ComputePGMRsrc1 register.
Definition: SIProgramInfo.cpp:23
llvm::AMDGPUAsmPrinter::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUAsmPrinter.cpp:97
R_SPILLED_SGPRS
#define R_SPILLED_SGPRS
Definition: SIDefines.h:922
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc1
uint32_t compute_pgm_rsrc1
Definition: AMDHSAKernelDescriptor.h:173
llvm::AMDGPUAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AMDGPUAsmPrinter.cpp:332
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
Definition: AMDKernelCodeT.h:163
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:112
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:264
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
amd_kernel_code_t::workitem_vgpr_count
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
Definition: AMDKernelCodeT.h:599
llvm::SIProgramInfo
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
G_00B84C_USER_SGPR
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:814
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:236
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:540
G_00B84C_TIDIG_COMP_CNT
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:832
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:85
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:166
llvm::TargetRegistry::RegisterAsmPrinter
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
Definition: TargetRegistry.h:903
S_00B028_VGPRS
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:777
llvm::AsmPrinter::getObjFileLowering
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:224
MCStreamer.h
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
AMDKernelCodeT.h
llvm::AsmPrinter::emitFunctionBody
void emitFunctionBody()
This method emits the body and trailer for a function.
Definition: AsmPrinter.cpp:1242
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
Definition: AMDKernelCodeT.h:103
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:128
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
S_00B84C_LDS_SIZE
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:839
llvm::AMDGPUResourceUsageAnalysis
Definition: AMDGPUResourceUsageAnalysis.h:27
G_00B84C_TGID_X_EN
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:820
llvm::SIMachineFunctionInfo::hasQueuePtr
bool hasQueuePtr() const
Definition: SIMachineFunctionInfo.h:635
llvm::SIMachineFunctionInfo::hasWorkGroupIDY
bool hasWorkGroupIDY() const
Definition: SIMachineFunctionInfo.h:655
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:240
llvm::AMDGPUAsmPrinter::emitFunctionEntryLabel
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AMDGPUAsmPrinter.cpp:260
llvm::SIMachineFunctionInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition: SIMachineFunctionInfo.h:639
llvm::SIProgramInfo::DX10Clamp
uint32_t DX10Clamp
Definition: SIProgramInfo.h:32
llvm::SIMachineFunctionInfo::getPSInputAddr
unsigned getPSInputAddr() const
Definition: SIMachineFunctionInfo.h:824
llvm::SIProgramInfo::ScratchBlocks
uint32_t ScratchBlocks
Definition: SIProgramInfo.h:41
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
Definition: AMDKernelCodeT.h:99
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUTargetStreamer::getPALMetadata
AMDGPUPALMetadata * getPALMetadata()
Definition: AMDGPUTargetStreamer.h:46
llvm::SIProgramInfo::SGPRBlocks
uint32_t SGPRBlocks
Definition: SIProgramInfo.h:28
llvm::AsmPrinter::isVerbose
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:238
LLVMInitializeAMDGPUAsmPrinter
void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter()
Definition: AMDGPUAsmPrinter.cpp:76
llvm::AMDGPU::isHsaAbiVersion3Or4
bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:126
llvm::AMDGPUAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AMDGPUAsmPrinter.cpp:437
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
Definition: AMDGPUResourceUsageAnalysis.h:32
llvm::AMDGPUAsmPrinter::HexLines
std::vector< std::string > HexLines
Definition: AMDGPUAsmPrinter.h:136
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:20
llvm::SystemZICMP::Any
@ Any
Definition: SystemZISelLowering.h:383
TargetRegistry.h
llvm::AMDGPUAsmPrinter::emitGlobalVariable
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Definition: AMDGPUAsmPrinter.cpp:297
R_00B02C_SPI_SHADER_PGM_RSRC2_PS
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:783
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
Definition: AMDKernelCodeT.h:193
llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:220
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:806
amd_kernel_code_t::wavefront_sgpr_count
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
Definition: AMDKernelCodeT.h:595
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
Definition: AMDKernelCodeT.h:91
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
llvm::DS_Error
@ DS_Error
Definition: DiagnosticInfo.h:45
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:647
S_00B84C_TRAP_HANDLER
#define S_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:816
llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:599
llvm::AMDGPU::HSAMD::MetadataStreamerV4
Definition: AMDGPUHSAMetadataStreamer.h:128
llvm::AMDGPUInstPrinter::printRegOperand
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
Definition: AMDGPUInstPrinter.cpp:341
llvm::ELF::STT_AMDGPU_HSA_KERNEL
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1164
llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition: AMDGPUSubtarget.h:122
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:700
AMDGPUBaseInfo.h
S_00B860_WAVESIZE
#define S_00B860_WAVESIZE(x)
Definition: SIDefines.h:908