LLVM  15.0.0git
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
12 /// code. When passed an MCAsmStreamer it prints assembly and when passed
13 /// an MCObjectStreamer it outputs binary code.
14 //
15 //===----------------------------------------------------------------------===//
16 //
17 
18 #include "AMDGPUAsmPrinter.h"
19 #include "AMDGPU.h"
22 #include "AMDKernelCodeT.h"
23 #include "GCNSubtarget.h"
26 #include "R600AsmPrinter.h"
27 #include "SIMachineFunctionInfo.h"
29 #include "Utils/AMDGPUBaseInfo.h"
30 #include "llvm/BinaryFormat/ELF.h"
32 #include "llvm/IR/DiagnosticInfo.h"
33 #include "llvm/MC/MCAssembler.h"
34 #include "llvm/MC/MCContext.h"
35 #include "llvm/MC/MCSectionELF.h"
36 #include "llvm/MC/MCStreamer.h"
37 #include "llvm/MC/TargetRegistry.h"
42 
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 
46 // This should get the default rounding mode from the kernel. We just set the
47 // default here, but this could change if the OpenCL rounding mode pragmas are
48 // used.
49 //
50 // The denormal mode here should match what is reported by the OpenCL runtime
51 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
52 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
53 //
54 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
55 // precision, and leaves single precision to flush all and does not report
56 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
57 // CL_FP_DENORM for both.
58 //
59 // FIXME: It seems some instructions do not support single precision denormals
60 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
61 // and sin_f32, cos_f32 on most parts).
62 
63 // We want to use these instructions, and using fp32 denormals also causes
64 // instructions to run at the double precision rate for the device so it's
65 // probably best to just report no single precision denormals.
66 static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
69  FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |
70  FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());
71 }
72 
73 static AsmPrinter *
75  std::unique_ptr<MCStreamer> &&Streamer) {
76  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
77 }
78 
84 }
85 
87  std::unique_ptr<MCStreamer> Streamer)
88  : AsmPrinter(TM, std::move(Streamer)) {
91  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
92  } else if (isHsaAbiVersion3(getGlobalSTI())) {
93  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
94  } else if (isHsaAbiVersion5(getGlobalSTI())) {
95  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV5());
96  } else {
97  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
98  }
99  }
100 }
101 
103  return "AMDGPU Assembly Printer";
104 }
105 
107  return TM.getMCSubtargetInfo();
108 }
109 
111  if (!OutStreamer)
112  return nullptr;
113  return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
114 }
115 
118 }
119 
120 void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
122 
123  // TODO: Which one is called first, emitStartOfAsmFile or
124  // emitFunctionBodyStart?
125  if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
126  initializeTargetID(M);
127 
128  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
130  return;
131 
134 
136  HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
137 
140 
142  return;
143 
144  // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
147 
148  // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
151  Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
152 }
153 
155  // Init target streamer if it has not yet happened
157  initTargetStreamer(M);
158 
159  // Following code requires TargetStreamer to be present.
160  if (!getTargetStreamer())
161  return;
162 
163  if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
166 
167  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
168  // Emit HSA Metadata (NT_AMD_HSA_METADATA).
169  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
170  HSAMetadataStream->end();
171  bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
172  (void)Success;
173  assert(Success && "Malformed HSA Metadata");
174  }
175 }
176 
178  const MachineBasicBlock *MBB) const {
180  return false;
181 
182  if (MBB->empty())
183  return true;
184 
185  // If this is a block implementing a long branch, an expression relative to
186  // the start of the block is needed. to the start of the block.
187  // XXX - Is there a smarter way to check this?
188  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
189 }
190 
193  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
194  const Function &F = MF->getFunction();
195 
196  // TODO: Which one is called first, emitStartOfAsmFile or
197  // emitFunctionBodyStart?
199  initializeTargetID(*F.getParent());
200 
201  const auto &FunctionTargetID = STM.getTargetID();
202  // Make sure function's xnack settings are compatible with module's
203  // xnack settings.
204  if (FunctionTargetID.isXnackSupported() &&
205  FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
206  FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {
207  OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) +
208  "' function does not match module xnack setting");
209  return;
210  }
211  // Make sure function's sramecc settings are compatible with module's
212  // sramecc settings.
213  if (FunctionTargetID.isSramEccSupported() &&
214  FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
215  FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) {
216  OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) +
217  "' function does not match module sramecc setting");
218  return;
219  }
220 
221  if (!MFI.isEntryFunction())
222  return;
223 
224  if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
225  (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
226  F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
227  amd_kernel_code_t KernelCode;
228  getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
229  getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
230  }
231 
232  if (STM.isAmdHsaOS())
233  HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
234 }
235 
238  if (!MFI.isEntryFunction())
239  return;
240 
241  if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
243  return;
244 
245  auto &Streamer = getTargetStreamer()->getStreamer();
246  auto &Context = Streamer.getContext();
247  auto &ObjectFileInfo = *Context.getObjectFileInfo();
248  auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
249 
250  Streamer.PushSection();
251  Streamer.SwitchSection(&ReadOnlySection);
252 
253  // CP microcode requires the kernel descriptor to be allocated on 64 byte
254  // alignment.
255  Streamer.emitValueToAlignment(64, 0, 1, 0);
256  if (ReadOnlySection.getAlignment() < 64)
257  ReadOnlySection.setAlignment(Align(64));
258 
259  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
260 
261  SmallString<128> KernelName;
262  getNameWithPrefix(KernelName, &MF->getFunction());
264  STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
265  CurrentProgramInfo.NumVGPRsForWavesPerEU,
266  CurrentProgramInfo.NumSGPRsForWavesPerEU -
268  CurrentProgramInfo.VCCUsed,
269  CurrentProgramInfo.FlatUsed),
270  CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
271 
272  Streamer.PopSection();
273 }
274 
276  if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
279  return;
280  }
281 
283  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
284  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
289  }
290  if (DumpCodeInstEmitter) {
291  // Disassemble function name label to text.
292  DisasmLines.push_back(MF->getName().str() + ":");
294  HexLines.push_back("");
295  }
296 
298 }
299 
301  if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
302  // Write a line for the basic block label if it is not only fallthrough.
303  DisasmLines.push_back(
304  (Twine("BB") + Twine(getFunctionNumber())
305  + "_" + Twine(MBB.getNumber()) + ":").str());
307  HexLines.push_back("");
308  }
310 }
311 
314  if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
316  Twine(GV->getName()) +
317  ": unsupported initializer for address space");
318  return;
319  }
320 
321  // LDS variables aren't emitted in HSA or PAL yet.
322  const Triple::OSType OS = TM.getTargetTriple().getOS();
323  if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
324  return;
325 
326  MCSymbol *GVSym = getSymbol(GV);
327 
328  GVSym->redefineIfPossible();
329  if (GVSym->isDefined() || GVSym->isVariable())
330  report_fatal_error("symbol '" + Twine(GVSym->getName()) +
331  "' is already defined");
332 
333  const DataLayout &DL = GV->getParent()->getDataLayout();
334  uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
335  Align Alignment = GV->getAlign().getValueOr(Align(4));
336 
337  emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
338  emitLinkage(GV, GVSym);
339  if (auto TS = getTargetStreamer())
340  TS->emitAMDGPULDS(GVSym, Size, Alignment);
341  return;
342  }
343 
345 }
346 
348  // Pad with s_code_end to help tools and guard against instruction prefetch
349  // causing stale data in caches. Arguably this should be done by the linker,
350  // which is why this isn't done for Mesa.
351  const MCSubtargetInfo &STI = *getGlobalSTI();
352  if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&
353  (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
354  STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
355  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
357  }
358 
360 }
361 
362 // Print comments that apply to both callable functions and entry points.
363 void AMDGPUAsmPrinter::emitCommonFunctionComments(
364  uint32_t NumVGPR,
365  Optional<uint32_t> NumAGPR,
366  uint32_t TotalNumVGPR,
367  uint32_t NumSGPR,
368  uint64_t ScratchSize,
369  uint64_t CodeSize,
370  const AMDGPUMachineFunction *MFI) {
371  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
372  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
373  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
374  if (NumAGPR) {
375  OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
376  OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
377  false);
378  }
379  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
380  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
381  false);
382 }
383 
384 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
385  const MachineFunction &MF) const {
387  uint16_t KernelCodeProperties = 0;
388 
389  if (MFI.hasPrivateSegmentBuffer()) {
390  KernelCodeProperties |=
391  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
392  }
393  if (MFI.hasDispatchPtr()) {
394  KernelCodeProperties |=
395  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
396  }
397  if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
398  KernelCodeProperties |=
399  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
400  }
401  if (MFI.hasKernargSegmentPtr()) {
402  KernelCodeProperties |=
403  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
404  }
405  if (MFI.hasDispatchID()) {
406  KernelCodeProperties |=
407  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
408  }
409  if (MFI.hasFlatScratchInit()) {
410  KernelCodeProperties |=
411  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
412  }
414  KernelCodeProperties |=
415  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
416  }
417 
418  return KernelCodeProperties;
419 }
420 
421 amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
422  const MachineFunction &MF,
423  const SIProgramInfo &PI) const {
424  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
425  const Function &F = MF.getFunction();
426 
427  amdhsa::kernel_descriptor_t KernelDescriptor;
428  memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
429 
433 
434  KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
435  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
436 
437  Align MaxKernArgAlign;
438  KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
439 
440  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
441  KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
442  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
443 
444  assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
445  if (STM.hasGFX90AInsts())
446  KernelDescriptor.compute_pgm_rsrc3 =
447  CurrentProgramInfo.ComputePGMRSrc3GFX90A;
448 
449  return KernelDescriptor;
450 }
451 
453  // Init target streamer lazily on the first function so that previous passes
454  // can set metadata.
456  initTargetStreamer(*MF.getFunction().getParent());
457 
458  ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
459  CurrentProgramInfo = SIProgramInfo();
460 
462 
463  // The starting address of all shader programs must be 256 bytes aligned.
464  // Regular functions just need the basic required instruction alignment.
465  MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
466 
468 
469  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
471  // FIXME: This should be an explicit check for Mesa.
472  if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
473  MCSectionELF *ConfigSection =
474  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
475  OutStreamer->SwitchSection(ConfigSection);
476  }
477 
478  if (MFI->isModuleEntryFunction()) {
479  getSIProgramInfo(CurrentProgramInfo, MF);
480  }
481 
482  if (STM.isAmdPalOS()) {
483  if (MFI->isEntryFunction())
484  EmitPALMetadata(MF, CurrentProgramInfo);
485  else if (MFI->isModuleEntryFunction())
486  emitPALFunctionMetadata(MF);
487  } else if (!STM.isAmdHsaOS()) {
488  EmitProgramInfoSI(MF, CurrentProgramInfo);
489  }
490 
491  DumpCodeInstEmitter = nullptr;
492  if (STM.dumpCode()) {
493  // For -dumpcode, get the assembler out of the streamer, even if it does
494  // not really want to let us have it. This only works with -filetype=obj.
495  bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
496  OutStreamer->setUseAssemblerInfoForParsing(true);
497  MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
498  OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
499  if (Assembler)
500  DumpCodeInstEmitter = Assembler->getEmitterPtr();
501  }
502 
503  DisasmLines.clear();
504  HexLines.clear();
505  DisasmLineMaxLen = 0;
506 
508 
509  if (isVerbose()) {
510  MCSectionELF *CommentSection =
511  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
512  OutStreamer->SwitchSection(CommentSection);
513 
514  if (!MFI->isEntryFunction()) {
515  OutStreamer->emitRawComment(" Function info:", false);
517  ResourceUsage->getResourceInfo(&MF.getFunction());
518  emitCommonFunctionComments(
519  Info.NumVGPR,
520  STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
521  Info.getTotalNumVGPRs(STM),
522  Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
523  Info.PrivateSegmentSize,
524  getFunctionCodeSize(MF), MFI);
525  return false;
526  }
527 
528  OutStreamer->emitRawComment(" Kernel info:", false);
529  emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
530  STM.hasMAIInsts()
531  ? CurrentProgramInfo.NumAccVGPR
532  : Optional<uint32_t>(),
533  CurrentProgramInfo.NumVGPR,
534  CurrentProgramInfo.NumSGPR,
535  CurrentProgramInfo.ScratchSize,
536  getFunctionCodeSize(MF), MFI);
537 
538  OutStreamer->emitRawComment(
539  " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
540  OutStreamer->emitRawComment(
541  " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
542  OutStreamer->emitRawComment(
543  " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
544  " bytes/workgroup (compile time only)", false);
545 
546  OutStreamer->emitRawComment(
547  " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
548  OutStreamer->emitRawComment(
549  " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
550 
551  OutStreamer->emitRawComment(
552  " NumSGPRsForWavesPerEU: " +
553  Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
554  OutStreamer->emitRawComment(
555  " NumVGPRsForWavesPerEU: " +
556  Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
557 
558  if (STM.hasGFX90AInsts())
559  OutStreamer->emitRawComment(
560  " AccumOffset: " +
561  Twine((CurrentProgramInfo.AccumOffset + 1) * 4), false);
562 
563  OutStreamer->emitRawComment(
564  " Occupancy: " +
565  Twine(CurrentProgramInfo.Occupancy), false);
566 
567  OutStreamer->emitRawComment(
568  " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
569 
570  OutStreamer->emitRawComment(
571  " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
572  Twine(G_00B84C_SCRATCH_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
573  OutStreamer->emitRawComment(
574  " COMPUTE_PGM_RSRC2:USER_SGPR: " +
575  Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
576  OutStreamer->emitRawComment(
577  " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
578  Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
579  OutStreamer->emitRawComment(
580  " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
581  Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
582  OutStreamer->emitRawComment(
583  " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
584  Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
585  OutStreamer->emitRawComment(
586  " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
587  Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
588  OutStreamer->emitRawComment(
589  " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
590  Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
591  false);
592 
593  assert(STM.hasGFX90AInsts() ||
594  CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
595  if (STM.hasGFX90AInsts()) {
596  OutStreamer->emitRawComment(
597  " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
598  Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
599  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
600  false);
601  OutStreamer->emitRawComment(
602  " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
603  Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
604  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
605  false);
606  }
607  }
608 
609  if (DumpCodeInstEmitter) {
610 
611  OutStreamer->SwitchSection(
612  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
613 
614  for (size_t i = 0; i < DisasmLines.size(); ++i) {
615  std::string Comment = "\n";
616  if (!HexLines[i].empty()) {
617  Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
618  Comment += " ; " + HexLines[i] + "\n";
619  }
620 
621  OutStreamer->emitBytes(StringRef(DisasmLines[i]));
622  OutStreamer->emitBytes(StringRef(Comment));
623  }
624  }
625 
626  return false;
627 }
628 
629 // TODO: Fold this into emitFunctionBodyStart.
630 void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
631  // In the beginning all features are either 'Any' or 'NotSupported',
632  // depending on global target features. This will cover empty modules.
634  *getGlobalSTI(), getGlobalSTI()->getFeatureString());
635 
636  // If module is empty, we are done.
637  if (M.empty())
638  return;
639 
640  // If module is not empty, need to find first 'Off' or 'On' feature
641  // setting per feature from functions in module.
642  for (auto &F : M) {
643  auto &TSTargetID = getTargetStreamer()->getTargetID();
644  if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
645  (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
646  break;
647 
648  const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
649  const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
650  if (TSTargetID->isXnackSupported())
651  if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
652  TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
653  if (TSTargetID->isSramEccSupported())
654  if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
655  TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
656  }
657 }
658 
659 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
660  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
661  const SIInstrInfo *TII = STM.getInstrInfo();
662 
663  uint64_t CodeSize = 0;
664 
665  for (const MachineBasicBlock &MBB : MF) {
666  for (const MachineInstr &MI : MBB) {
667  // TODO: CodeSize should account for multiple functions.
668 
669  // TODO: Should we count size of debug info?
670  if (MI.isDebugInstr())
671  continue;
672 
673  CodeSize += TII->getInstSizeInBytes(MI);
674  }
675  }
676 
677  return CodeSize;
678 }
679 
680 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
681  const MachineFunction &MF) {
683  ResourceUsage->getResourceInfo(&MF.getFunction());
684  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
685 
686  ProgInfo.NumArchVGPR = Info.NumVGPR;
687  ProgInfo.NumAccVGPR = Info.NumAGPR;
688  ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
689  ProgInfo.AccumOffset = alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1;
690  ProgInfo.TgSplit = STM.isTgSplitEnabled();
691  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
692  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
693  ProgInfo.VCCUsed = Info.UsesVCC;
694  ProgInfo.FlatUsed = Info.UsesFlatScratch;
695  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
696 
697  const uint64_t MaxScratchPerWorkitem =
699  if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
700  DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
701  ProgInfo.ScratchSize,
702  MaxScratchPerWorkitem, DS_Error);
703  MF.getFunction().getContext().diagnose(DiagStackSize);
704  }
705 
707 
708  // The calculations related to SGPR/VGPR blocks are
709  // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
710  // unified.
711  unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
712  &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
713 
714  // Check the addressable register limit before we add ExtraSGPRs.
716  !STM.hasSGPRInitBug()) {
717  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
718  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
719  // This can happen due to a compiler bug or when using inline asm.
722  MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR,
723  MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);
724  Ctx.diagnose(Diag);
725  ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
726  }
727  }
728 
729  // Account for extra SGPRs and VGPRs reserved for debugger use.
730  ProgInfo.NumSGPR += ExtraSGPRs;
731 
732  const Function &F = MF.getFunction();
733 
734  // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
735  // dispatch registers are function args.
736  unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
737 
738  if (isShader(F.getCallingConv())) {
739  bool IsPixelShader =
740  F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();
741 
742  // Calculate the number of VGPR registers based on the SPI input registers
743  uint32_t InputEna = 0;
744  uint32_t InputAddr = 0;
745  unsigned LastEna = 0;
746 
747  if (IsPixelShader) {
748  // Note for IsPixelShader:
749  // By this stage, all enabled inputs are tagged in InputAddr as well.
750  // We will use InputAddr to determine whether the input counts against the
751  // vgpr total and only use the InputEnable to determine the last input
752  // that is relevant - if extra arguments are used, then we have to honour
753  // the InputAddr for any intermediate non-enabled inputs.
754  InputEna = MFI->getPSInputEnable();
755  InputAddr = MFI->getPSInputAddr();
756 
757  // We only need to consider input args up to the last used arg.
758  assert((InputEna || InputAddr) &&
759  "PSInputAddr and PSInputEnable should "
760  "never both be 0 for AMDGPU_PS shaders");
761  // There are some rare circumstances where InputAddr is non-zero and
762  // InputEna can be set to 0. In this case we default to setting LastEna
763  // to 1.
764  LastEna = InputEna ? findLastSet(InputEna) + 1 : 1;
765  }
766 
767  // FIXME: We should be using the number of registers determined during
768  // calling convention lowering to legalize the types.
769  const DataLayout &DL = F.getParent()->getDataLayout();
770  unsigned PSArgCount = 0;
771  unsigned IntermediateVGPR = 0;
772  for (auto &Arg : F.args()) {
773  unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
774  if (Arg.hasAttribute(Attribute::InReg)) {
775  WaveDispatchNumSGPR += NumRegs;
776  } else {
777  // If this is a PS shader and we're processing the PS Input args (first
778  // 16 VGPR), use the InputEna and InputAddr bits to define how many
779  // VGPRs are actually used.
780  // Any extra VGPR arguments are handled as normal arguments (and
781  // contribute to the VGPR count whether they're used or not).
782  if (IsPixelShader && PSArgCount < 16) {
783  if ((1 << PSArgCount) & InputAddr) {
784  if (PSArgCount < LastEna)
785  WaveDispatchNumVGPR += NumRegs;
786  else
787  IntermediateVGPR += NumRegs;
788  }
789  PSArgCount++;
790  } else {
791  // If there are extra arguments we have to include the allocation for
792  // the non-used (but enabled with InputAddr) input arguments
793  if (IntermediateVGPR) {
794  WaveDispatchNumVGPR += IntermediateVGPR;
795  IntermediateVGPR = 0;
796  }
797  WaveDispatchNumVGPR += NumRegs;
798  }
799  }
800  }
801  ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
802  ProgInfo.NumArchVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
803  ProgInfo.NumVGPR =
804  Info.getTotalNumVGPRs(STM, Info.NumAGPR, ProgInfo.NumArchVGPR);
805  }
806 
807  // Adjust number of registers used to meet default/requested minimum/maximum
808  // number of waves per execution unit request.
809  ProgInfo.NumSGPRsForWavesPerEU = std::max(
810  std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
811  ProgInfo.NumVGPRsForWavesPerEU = std::max(
812  std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
813 
815  STM.hasSGPRInitBug()) {
816  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
817  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
818  // This can happen due to a compiler bug or when using inline asm to use
819  // the registers which are usually reserved for vcc etc.
821  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",
822  ProgInfo.NumSGPR, MaxAddressableNumSGPRs,
824  Ctx.diagnose(Diag);
825  ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
826  ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
827  }
828  }
829 
830  if (STM.hasSGPRInitBug()) {
831  ProgInfo.NumSGPR =
833  ProgInfo.NumSGPRsForWavesPerEU =
835  }
836 
837  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
839  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
840  MFI->getNumUserSGPRs(),
842  Ctx.diagnose(Diag);
843  }
844 
845  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
847  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
848  MFI->getLDSSize(),
850  Ctx.diagnose(Diag);
851  }
852 
854  &STM, ProgInfo.NumSGPRsForWavesPerEU);
856  &STM, ProgInfo.NumVGPRsForWavesPerEU);
857 
858  const SIModeRegisterDefaults Mode = MFI->getMode();
859 
860  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
861  // register.
862  ProgInfo.FloatMode = getFPMode(Mode);
863 
864  ProgInfo.IEEEMode = Mode.IEEE;
865 
866  // Make clamp modifier on NaN input returns 0.
867  ProgInfo.DX10Clamp = Mode.DX10Clamp;
868 
869  unsigned LDSAlignShift;
871  // LDS is allocated in 64 dword blocks.
872  LDSAlignShift = 8;
873  } else {
874  // LDS is allocated in 128 dword blocks.
875  LDSAlignShift = 9;
876  }
877 
878  ProgInfo.LDSSize = MFI->getLDSSize();
879  ProgInfo.LDSBlocks =
880  alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
881 
882  // Scratch is allocated in 256 dword blocks.
883  unsigned ScratchAlignShift = 10;
884  // We need to program the hardware with the amount of scratch memory that
885  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
886  // scratch memory used per thread.
887  ProgInfo.ScratchBlocks =
888  alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
889  1ULL << ScratchAlignShift) >>
890  ScratchAlignShift;
891 
892  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
893  ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
894  ProgInfo.MemOrdered = 1;
895  }
896 
897  // 0 = X, 1 = XY, 2 = XYZ
898  unsigned TIDIGCompCnt = 0;
899  if (MFI->hasWorkItemIDZ())
900  TIDIGCompCnt = 2;
901  else if (MFI->hasWorkItemIDY())
902  TIDIGCompCnt = 1;
903 
904  ProgInfo.ComputePGMRSrc2 =
905  S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
907  // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
913  S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
915  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
916  S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
917  S_00B84C_EXCP_EN(0);
918 
919  if (STM.hasGFX90AInsts()) {
921  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
922  ProgInfo.AccumOffset);
924  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
925  ProgInfo.TgSplit);
926  }
927 
928  ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
929  ProgInfo.NumSGPRsForWavesPerEU,
930  ProgInfo.NumVGPRsForWavesPerEU);
931 }
932 
933 static unsigned getRsrcReg(CallingConv::ID CallConv) {
934  switch (CallConv) {
935  default: LLVM_FALLTHROUGH;
943  }
944 }
945 
946 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
947  const SIProgramInfo &CurrentProgramInfo) {
949  unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
950 
953 
954  OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
955 
957  OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
958 
960  OutStreamer->emitInt32(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks));
961 
962  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
963  // 0" comment but I don't see a corresponding field in the register spec.
964  } else {
965  OutStreamer->emitInt32(RsrcReg);
966  OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
967  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
969  OutStreamer->emitIntValue(
970  S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
971  }
972 
975  OutStreamer->emitInt32(
976  S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
978  OutStreamer->emitInt32(MFI->getPSInputEnable());
980  OutStreamer->emitInt32(MFI->getPSInputAddr());
981  }
982 
983  OutStreamer->emitInt32(R_SPILLED_SGPRS);
984  OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());
985  OutStreamer->emitInt32(R_SPILLED_VGPRS);
986  OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
987 }
988 
989 // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
990 // is AMDPAL. It stores each compute/SPI register setting and other PAL
991 // metadata items into the PALMD::Metadata, combining with any provided by the
992 // frontend as LLVM metadata. Once all functions are written, the PAL metadata
993 // is then written as a single block in the .note section.
994 void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
995  const SIProgramInfo &CurrentProgramInfo) {
997  auto CC = MF.getFunction().getCallingConv();
998  auto MD = getTargetStreamer()->getPALMetadata();
999 
1000  MD->setEntryPoint(CC, MF.getFunction().getName());
1001  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1002 
1003  // Only set AGPRs for supported devices
1004  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1005  if (STM.hasMAIInsts()) {
1006  MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
1007  }
1008 
1009  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1010  MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
1011  if (AMDGPU::isCompute(CC)) {
1012  MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
1013  } else {
1014  if (CurrentProgramInfo.ScratchBlocks > 0)
1015  MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
1016  }
1017  // ScratchSize is in bytes, 16 aligned.
1018  MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
1020  MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
1021  MD->setSpiPsInputEna(MFI->getPSInputEnable());
1022  MD->setSpiPsInputAddr(MFI->getPSInputAddr());
1023  }
1024 
1025  if (STM.isWave32())
1026  MD->setWave32(MF.getFunction().getCallingConv());
1027 }
1028 
1029 void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
1030  auto *MD = getTargetStreamer()->getPALMetadata();
1031  const MachineFrameInfo &MFI = MF.getFrameInfo();
1032  MD->setFunctionScratchSize(MF, MFI.getStackSize());
1033 
1034  // Set compute registers
1035  MD->setRsrc1(CallingConv::AMDGPU_CS,
1036  CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
1037  MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
1038 
1039  // Set optional info
1040  MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
1041  MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1042  MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1043 }
1044 
1045 // This is supposed to be log2(Size)
1047  switch (Size) {
1048  case 4:
1049  return AMD_ELEMENT_4_BYTES;
1050  case 8:
1051  return AMD_ELEMENT_8_BYTES;
1052  case 16:
1053  return AMD_ELEMENT_16_BYTES;
1054  default:
1055  llvm_unreachable("invalid private_element_size");
1056  }
1057 }
1058 
1059 void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1060  const SIProgramInfo &CurrentProgramInfo,
1061  const MachineFunction &MF) const {
1062  const Function &F = MF.getFunction();
1063  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
1064  F.getCallingConv() == CallingConv::SPIR_KERNEL);
1065 
1067  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1068 
1070 
1072  CurrentProgramInfo.getComputePGMRSrc1() |
1073  (CurrentProgramInfo.ComputePGMRSrc2 << 32);
1075 
1076  if (CurrentProgramInfo.DynamicCallStack)
1078 
1081  getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
1082 
1083  if (MFI->hasPrivateSegmentBuffer()) {
1084  Out.code_properties |=
1086  }
1087 
1088  if (MFI->hasDispatchPtr())
1090 
1091  if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
1093 
1094  if (MFI->hasKernargSegmentPtr())
1096 
1097  if (MFI->hasDispatchID())
1099 
1100  if (MFI->hasFlatScratchInit())
1102 
1103  if (MFI->hasDispatchPtr())
1105 
1106  if (STM.isXNACKEnabled())
1108 
1109  Align MaxKernArgAlign;
1110  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1111  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1112  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1113  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1114  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1115 
1116  // kernarg_segment_alignment is specified as log of the alignment.
1117  // The minimum alignment is 16.
1118  // FIXME: The metadata treats the minimum as 4?
1119  Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
1120 }
1121 
1123  const char *ExtraCode, raw_ostream &O) {
1124  // First try the generic code, which knows about modifiers like 'c' and 'n'.
1125  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
1126  return false;
1127 
1128  if (ExtraCode && ExtraCode[0]) {
1129  if (ExtraCode[1] != 0)
1130  return true; // Unknown modifier.
1131 
1132  switch (ExtraCode[0]) {
1133  case 'r':
1134  break;
1135  default:
1136  return true;
1137  }
1138  }
1139 
1140  // TODO: Should be able to support other operand types like globals.
1141  const MachineOperand &MO = MI->getOperand(OpNo);
1142  if (MO.isReg()) {
1145  return false;
1146  } else if (MO.isImm()) {
1147  int64_t Val = MO.getImm();
1148  if (AMDGPU::isInlinableIntLiteral(Val)) {
1149  O << Val;
1150  } else if (isUInt<16>(Val)) {
1151  O << format("0x%" PRIx16, static_cast<uint16_t>(Val));
1152  } else if (isUInt<32>(Val)) {
1153  O << format("0x%" PRIx32, static_cast<uint32_t>(Val));
1154  } else {
1155  O << format("0x%" PRIx64, static_cast<uint64_t>(Val));
1156  }
1157  return false;
1158  }
1159  return true;
1160 }
1161 
1166 }
llvm::AMDGPUAsmPrinter::emitBasicBlockStart
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
Definition: AMDGPUAsmPrinter.cpp:300
llvm::MCTargetStreamer::getStreamer
MCStreamer & getStreamer()
Definition: MCStreamer.h:101
S_00B84C_TGID_Y_EN
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:946
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::AMDGPU::isHsaAbiVersion3
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:125
getFPMode
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode)
Definition: AMDGPUAsmPrinter.cpp:66
FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:1010
llvm::AMDGPUTargetStreamer::getTargetID
const Optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
Definition: AMDGPUTargetStreamer.h:97
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
getRsrcReg
static unsigned getRsrcReg(CallingConv::ID CallConv)
Definition: AMDGPUAsmPrinter.cpp:933
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
Definition: AMDKernelCodeT.h:95
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::AMDGPUPALMetadata::readFromIR
void readFromIR(Module &M)
Definition: AMDGPUPALMetadata.cpp:31
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:909
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
AMD_ELEMENT_4_BYTES
@ AMD_ELEMENT_4_BYTES
Definition: AMDKernelCodeT.h:55
llvm::AMDGPU::getIsaVersion
IsaVersion getIsaVersion(StringRef GPU)
Definition: TargetParser.cpp:193
SIMachineFunctionInfo.h
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:74
llvm::Function
Definition: Function.h:60
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setXnackSetting
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Definition: AMDGPUBaseInfo.h:140
llvm::SIMachineFunctionInfo::getNumSpilledSGPRs
unsigned getNumSpilledSGPRs() const
Definition: SIMachineFunctionInfo.h:840
AMDGPUHSAMetadataStreamer.h
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
@ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
Definition: AMDKernelCodeT.h:184
llvm::AMDGPUTargetStreamer::EmitDirectiveAMDGCNTarget
virtual void EmitDirectiveAMDGCNTarget()=0
MCSectionELF.h
FP_DENORM_MODE_DP
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:1029
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
amd_kernel_code_t::compute_pgm_resource_registers
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Definition: AMDKernelCodeT.h:558
llvm::AMDGPUAsmPrinter::AMDGPUAsmPrinter
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
Definition: AMDGPUAsmPrinter.cpp:86
llvm::SIProgramInfo::WgpMode
uint32_t WgpMode
Definition: SIProgramInfo.h:35
llvm::SIProgramInfo::NumSGPR
uint32_t NumSGPR
Definition: SIProgramInfo.h:51
llvm::AMDGPUAsmPrinter::emitFunctionBodyStart
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:191
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::SystemZICMP::Any
@ Any
Definition: SystemZISelLowering.h:377
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::AsmPrinter::getNameWithPrefix
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:649
MCAssembler.h
llvm::AMDGPUAsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: AMDGPUAsmPrinter.cpp:1162
llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:1903
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition: SIDefines.h:931
llvm::AMDGPU::getAmdhsaCodeObjectVersion
unsigned getAmdhsaCodeObjectVersion()
Definition: AMDGPUBaseInfo.cpp:148
llvm::AMDGPUPALMetadata::setEntryPoint
void setEntryPoint(unsigned CC, StringRef Name)
Definition: AMDGPUPALMetadata.cpp:188
R_0286CC_SPI_PS_INPUT_ENA
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:970
llvm::AMDGPUMachineFunction::getLDSSize
uint32_t getLDSSize() const
Definition: AMDGPUMachineFunction.h:74
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
amd_element_byte_size_t
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
Definition: AMDKernelCodeT.h:53
AMDGPUAsmPrinter.h
llvm::MCSymbol::isDefined
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition: MCSymbol.h:243
amd_kernel_code_t::workgroup_group_segment_byte_size
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
Definition: AMDKernelCodeT.h:574
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:529
llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition: AMDGPUBaseInfo.cpp:725
llvm::AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition: AMDGPUAsmPrinter.cpp:177
AMD_HSA_BITS_SET
#define AMD_HSA_BITS_SET(dst, mask, val)
Definition: AMDKernelCodeT.h:43
llvm::AMDGPU::HSAMD::MetadataStreamerV3
Definition: AMDGPUHSAMetadataStreamer.h:64
llvm::AMDGPUTargetStreamer::EmitDirectiveHSACodeObjectVersion
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
llvm::SIMachineFunctionInfo::getPSInputEnable
unsigned getPSInputEnable() const
Definition: SIMachineFunctionInfo.h:860
llvm::Optional< uint32_t >
llvm::AMDGPU::SIModeRegisterDefaults
Definition: AMDGPUBaseInfo.h:982
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
S_00B84C_USER_SGPR
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:937
llvm::AMDGPUAsmPrinter
Definition: AMDGPUAsmPrinter.h:40
llvm::SIProgramInfo::NumVGPR
uint32_t NumVGPR
Definition: SIProgramInfo.h:46
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
Definition: AMDKernelCodeT.h:107
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:257
llvm::AMDGPUTargetStreamer::EmitAMDKernelCodeT
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
llvm::MCSectionELF
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:26
TargetParser.h
llvm::SIProgramInfo::NumSGPRsForWavesPerEU
uint32_t NumSGPRsForWavesPerEU
Definition: SIProgramInfo.h:56
llvm::MCObjectFileInfo::getContext
MCContext & getContext() const
Definition: MCObjectFileInfo.h:244
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc2
uint32_t compute_pgm_rsrc2
Definition: AMDHSAKernelDescriptor.h:174
llvm::SIProgramInfo::LDSSize
uint32_t LDSSize
Definition: SIProgramInfo.h:52
llvm::ELF::STT_AMDGPU_HSA_KERNEL
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1205
llvm::SIProgramInfo::AccumOffset
uint32_t AccumOffset
Definition: SIProgramInfo.h:49
R_0286D0_SPI_PS_INPUT_ADDR
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:971
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:547
llvm::AMDGPUTargetStreamer::initializeTargetID
void initializeTargetID(const MCSubtargetInfo &STI)
Definition: AMDGPUTargetStreamer.h:103
llvm::AsmPrinter::emitGlobalVariable
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:676
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
AMDHSAKernelDescriptor.h
AMD_ELEMENT_8_BYTES
@ AMD_ELEMENT_8_BYTES
Definition: AMDKernelCodeT.h:56
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:223
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:214
S_00B84C_TIDIG_COMP_CNT
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:955
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::AMDGPU::IsaVersion
Instruction set architecture version.
Definition: TargetParser.h:113
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
R600AsmPrinter.h
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:120
S_00B84C_SCRATCH_EN
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:934
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:231
llvm::AMDGPU::isGFX90A
bool isGFX90A(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1730
llvm::SIProgramInfo::MemOrdered
uint32_t MemOrdered
Definition: SIProgramInfo.h:36
S_00B84C_EXCP_EN_MSB
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:959
llvm::AMDGPUMachineFunction::needsWaveLimiter
bool needsWaveLimiter() const
Definition: AMDGPUMachineFunction.h:100
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:105
ELF.h
TargetMachine.h
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:241
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1576
llvm::AsmPrinter::emitLinkage
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
Definition: AsmPrinter.cpp:609
llvm::SIProgramInfo::NumArchVGPR
uint32_t NumArchVGPR
Definition: SIProgramInfo.h:47
llvm::SIProgramInfo::ComputePGMRSrc2
uint64_t ComputePGMRSrc2
Definition: SIProgramInfo.h:43
GCNSubtarget.h
S_00B84C_TGID_Z_EN
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:949
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineFunction::setAlignment
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
Definition: MachineFunction.h:691
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:738
S_00B02C_EXTRA_LDS_SIZE
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:908
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
Definition: AMDKernelCodeT.h:87
G_00B84C_TRAP_HANDLER
#define G_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:941
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:96
llvm::SIProgramInfo::ScratchSize
uint64_t ScratchSize
Definition: SIProgramInfo.h:37
AMDGPUTargetInfo.h
llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:207
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:108
MCContext.h
llvm::SIProgramInfo::NumVGPRsForWavesPerEU
uint32_t NumVGPRsForWavesPerEU
Definition: SIProgramInfo.h:59
llvm::SIMachineFunctionInfo::hasWorkGroupIDZ
bool hasWorkGroupIDZ() const
Definition: SIMachineFunctionInfo.h:695
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AsmPrinter::SetupMachineFunction
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
Definition: AsmPrinter.cpp:2153
llvm::AsmPrinter::emitFunctionEntryLabel
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:1011
llvm::SIProgramInfo::LDSBlocks
uint32_t LDSBlocks
Definition: SIProgramInfo.h:40
llvm::Optional::getValueOr
constexpr T getValueOr(U &&value) const &
Definition: Optional.h:289
llvm::amdhsa::kernel_descriptor_t::group_segment_fixed_size
uint32_t group_segment_fixed_size
Definition: AMDHSAKernelDescriptor.h:166
llvm::DiagnosticInfoResourceLimit
Diagnostic information for stack size etc.
Definition: DiagnosticInfo.h:186
llvm::AsmPrinter::isBlockOnlyReachableByFallthrough
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition: AsmPrinter.cpp:3577
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
llvm::AMDGPUResourceUsageAnalysis::getResourceInfo
const SIFunctionResourceInfo & getResourceInfo(const Function *F) const
Definition: AMDGPUResourceUsageAnalysis.h:67
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:914
llvm::AMDGPU
Definition: AMDGPUMetadataVerifier.h:34
llvm::AMDGPUAsmPrinter::IsTargetStreamerInitialized
bool IsTargetStreamerInitialized
Definition: AMDGPUAsmPrinter.h:137
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1220
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:920
G_00B84C_TGID_Y_EN
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:947
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
amd_kernel_code_t::workitem_private_segment_byte_size
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
Definition: AMDKernelCodeT.h:568
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
getElementByteSizeValue
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
Definition: AMDGPUAsmPrinter.cpp:1046
llvm::SIProgramInfo::ComputePGMRSrc3GFX90A
uint64_t ComputePGMRSrc3GFX90A
Definition: SIProgramInfo.h:44
llvm::AMDGPUAsmPrinter::PrintAsmOperand
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AMDGPUAsmPrinter.cpp:1122
llvm::SIProgramInfo::DynamicCallStack
bool DynamicCallStack
Definition: SIProgramInfo.h:66
AMDHSA_BITS_GET
#define AMDHSA_BITS_GET(SRC, MSK)
Definition: AMDHSAKernelDescriptor.h:37
llvm::AMDGPUAsmPrinter::emitStartOfAsmFile
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
Definition: AMDGPUAsmPrinter.cpp:116
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:577
llvm::amdhsa::kernel_descriptor_t::kernel_code_properties
uint16_t kernel_code_properties
Definition: AMDHSAKernelDescriptor.h:175
llvm::MCAssembler::getEmitterPtr
MCCodeEmitter * getEmitterPtr() const
Definition: MCAssembler.h:325
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:198
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:537
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:447
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Definition: AMDGPUSubtarget.h:208
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3449
llvm::AMDGPU::isGFX10Plus
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1690
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc3
uint32_t compute_pgm_rsrc3
Definition: AMDHSAKernelDescriptor.h:172
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::AMDGPUMachineFunction::getMode
AMDGPU::SIModeRegisterDefaults getMode() const
Definition: AMDGPUMachineFunction.h:82
llvm::SmallString< 128 >
llvm::AMDGPU::isHsaAbiVersion2
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:119
llvm::DK_ResourceLimit
@ DK_ResourceLimit
Definition: DiagnosticInfo.h:62
llvm::AMDGPUTargetStreamer::EmitAmdhsaKernelDescriptor
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr)=0
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
S_00B84C_TGID_X_EN
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:943
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:199
R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:933
llvm::GCNSubtarget::MaxWaveScratchSize
static const unsigned MaxWaveScratchSize
Definition: GCNSubtarget.h:205
AMD_CODE_PROPERTY_IS_PTR64
@ AMD_CODE_PROPERTY_IS_PTR64
Definition: AMDKernelCodeT.h:172
R_0286E8_SPI_TMPRING_SIZE
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:1034
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::amdhsa::kernel_descriptor_t::private_segment_fixed_size
uint32_t private_segment_fixed_size
Definition: AMDHSAKernelDescriptor.h:167
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
AMDGPUResourceUsageAnalysis.h
Analyzes how many registers and other resources are used by functions.
llvm::AMDGPU::isInlinableIntLiteral
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
Definition: AMDGPUBaseInfo.h:907
llvm::MCAssembler
Definition: MCAssembler.h:73
llvm::SIProgramInfo::FloatMode
uint32_t FloatMode
Definition: SIProgramInfo.h:30
AMD_ELEMENT_16_BYTES
@ AMD_ELEMENT_16_BYTES
Definition: AMDKernelCodeT.h:57
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
AMDGPUTargetStreamer.h
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
llvm::GlobalValue::getVisibility
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:228
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
R_SPILLED_VGPRS
#define R_SPILLED_VGPRS
Definition: SIDefines.h:1047
llvm::SIMachineFunctionInfo::hasWorkGroupInfo
bool hasWorkGroupInfo() const
Definition: SIMachineFunctionInfo.h:699
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:577
llvm::Triple::getOS
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:346
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:126
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:209
llvm::SIMachineFunctionInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: SIMachineFunctionInfo.h:912
llvm::AMDGPUAsmPrinter::emitEndOfAsmFile
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
Definition: AMDGPUAsmPrinter.cpp:154
R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:900
llvm::SIMachineFunctionInfo::hasDispatchID
bool hasDispatchID() const
Definition: SIMachineFunctionInfo.h:679
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AMDGPU::initDefaultAMDKernelCodeT
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:833
R_00B848_COMPUTE_PGM_RSRC1
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:973
llvm::SIMachineFunctionInfo::getNumSpilledVGPRs
unsigned getNumSpilledVGPRs() const
Definition: SIMachineFunctionInfo.h:844
llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:86
AMDGPUInstPrinter.h
llvm::AMDGPUTargetStreamer::EmitISAVersion
virtual bool EmitISAVersion()=0
llvm::AMDGPUAsmPrinter::getTargetStreamer
AMDGPUTargetStreamer * getTargetStreamer() const
Definition: AMDGPUAsmPrinter.cpp:110
llvm::MCSymbol::isVariable
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition: MCSymbol.h:293
R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition: SIDefines.h:923
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:654
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1663
llvm::AMDGPUAsmPrinter::DisasmLines
std::vector< std::string > DisasmLines
Definition: AMDGPUAsmPrinter.h:135
llvm::AMDGPUAsmPrinter::getGlobalSTI
const MCSubtargetInfo * getGlobalSTI() const
Definition: AMDGPUAsmPrinter.cpp:106
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:656
createAMDGPUAsmPrinterPass
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Definition: AMDGPUAsmPrinter.cpp:74
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::MCContext::reportError
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1005
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1175
llvm::SIProgramInfo::VGPRBlocks
uint32_t VGPRBlocks
Definition: SIProgramInfo.h:27
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1099
llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:99
llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:91
llvm::SIProgramInfo::getPGMRSrc1
uint64_t getPGMRSrc1(CallingConv::ID CC) const
Definition: SIProgramInfo.cpp:31
llvm::findLastSet
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
Definition: MathExtras.h:280
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1012
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::AMDGPUMachineFunction::isMemoryBound
bool isMemoryBound() const
Definition: AMDGPUMachineFunction.h:96
llvm::AMDGPU::HSAMD::MetadataStreamerV5
Definition: AMDGPUHSAMetadataStreamer.h:148
llvm::AMDGPU::IsaInfo::getNumSGPRBlocks
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
Definition: AMDGPUBaseInfo.cpp:756
amd_kernel_code_t::kernarg_segment_alignment
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment.
Definition: AMDKernelCodeT.h:634
amd_kernel_code_t::kernarg_segment_byte_size
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel.
Definition: AMDKernelCodeT.h:583
llvm::getCPU
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Definition: AVRTargetMachine.cpp:32
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1586
FP_DENORM_MODE_SP
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:1028
llvm::MCSymbol::redefineIfPossible
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition: MCSymbol.h:225
S_0286E8_WAVESIZE
#define S_0286E8_WAVESIZE(x)
Definition: SIDefines.h:1035
llvm::AMDGPU::isHsaAbiVersion5
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:137
llvm::SIMachineFunctionInfo::hasWorkGroupIDX
bool hasWorkGroupIDX() const
Definition: SIMachineFunctionInfo.h:687
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1078
llvm::AMDGPUTargetStreamer::EmitCodeEnd
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)=0
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AMDGPUAsmPrinter::emitFunctionBodyEnd
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:236
AMDGPU.h
llvm::AsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: AsmPrinter.cpp:423
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition: AMDGPUBaseInfo.h:94
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1595
TargetLoweringObjectFile.h
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
uint32_t
llvm::AMDGPU::IsaInfo::getNumVGPRBlocks
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:823
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
G_00B84C_SCRATCH_EN
#define G_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:935
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
llvm::DiagnosticInfoStackSize
Definition: DiagnosticInfo.h:223
llvm::AMDGPUTargetStreamer
Definition: AMDGPUTargetStreamer.h:34
amd_kernel_code_t
AMD Kernel Code Object (amd_kernel_code_t).
Definition: AMDKernelCodeT.h:526
R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition: SIDefines.h:922
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::SIMachineFunctionInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition: SIMachineFunctionInfo.h:663
AMDHSA_BITS_SET
#define AMDHSA_BITS_SET(DST, MSK, VAL)
Definition: AMDHSAKernelDescriptor.h:42
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::AMDGPUAsmPrinter::DisasmLineMaxLen
size_t DisasmLineMaxLen
Definition: AMDGPUAsmPrinter.h:136
llvm::amdhsa::kernel_descriptor_t::kernarg_size
uint32_t kernarg_size
Definition: AMDHSAKernelDescriptor.h:168
G_00B84C_TGID_Z_EN
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:950
FP_ROUND_MODE_SP
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:1017
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1105
FP_ROUND_MODE_DP
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:1018
llvm::SIProgramInfo::FlatUsed
bool FlatUsed
Definition: SIProgramInfo.h:53
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:207
llvm::SIMachineFunctionInfo::getNumUserSGPRs
unsigned getNumUserSGPRs() const
Definition: SIMachineFunctionInfo.h:755
llvm::SIProgramInfo::IEEEMode
uint32_t IEEEMode
Definition: SIProgramInfo.h:34
llvm::SIProgramInfo::NumAccVGPR
uint32_t NumAccVGPR
Definition: SIProgramInfo.h:48
llvm::SIMachineFunctionInfo::hasWorkItemIDZ
bool hasWorkItemIDZ() const
Definition: SIMachineFunctionInfo.h:715
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::SIProgramInfo::Occupancy
uint32_t Occupancy
Definition: SIProgramInfo.h:62
llvm::AMDGPU::HSAMD::Kernel::Key::SymbolName
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
Definition: AMDGPUMetadata.h:386
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:243
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::AMDGPUTargetStreamer::EmitAMDGPUSymbolType
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
llvm::Triple::OSType
OSType
Definition: Triple.h:173
S_00B84C_TG_SIZE_EN
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:952
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
std
Definition: BitVector.h:851
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:606
llvm::SIMachineFunctionInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition: SIMachineFunctionInfo.h:667
S_00B84C_EXCP_EN
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:966
uint16_t
llvm::ELF::SHT_PROGBITS
@ SHT_PROGBITS
Definition: ELF.h:946
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::AMDGPUMachineFunction::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition: AMDGPUMachineFunction.h:90
MachineFrameInfo.h
amd_kernel_code_t::code_properties
uint32_t code_properties
Code properties.
Definition: AMDKernelCodeT.h:562
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:250
llvm::createR600AsmPrinterPass
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Definition: R600AsmPrinter.cpp:31
R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:1031
Success
#define Success
Definition: AArch64Disassembler.cpp:279
llvm::Triple::AMDPAL
@ AMDPAL
Definition: Triple.h:208
llvm::SIProgramInfo::TgSplit
uint32_t TgSplit
Definition: SIProgramInfo.h:50
DiagnosticInfo.h
llvm::SIMachineFunctionInfo::hasWorkItemIDY
bool hasWorkItemIDY() const
Definition: SIMachineFunctionInfo.h:711
llvm::amdhsa::kernel_descriptor_t
Definition: AMDHSAKernelDescriptor.h:165
llvm::AsmPrinter
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:81
S_00B028_SGPRS
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:902
llvm::SIProgramInfo::VCCUsed
bool VCCUsed
Definition: SIProgramInfo.h:69
llvm::AsmPrinter::emitVisibility
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
Definition: AsmPrinter.cpp:3537
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:796
llvm::AMDGPU::HSAMD::MetadataStreamerV2
Definition: AMDGPUHSAMetadataStreamer.h:160
llvm::AMDGPUTargetStreamer::EmitDirectiveHSACodeObjectISAV2
virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::SIProgramInfo::getComputePGMRSrc1
uint64_t getComputePGMRSrc1() const
Compute the value of the ComputePGMRsrc1 register.
Definition: SIProgramInfo.cpp:23
llvm::AMDGPUAsmPrinter::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUAsmPrinter.cpp:102
R_SPILLED_SGPRS
#define R_SPILLED_SGPRS
Definition: SIDefines.h:1046
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc1
uint32_t compute_pgm_rsrc1
Definition: AMDHSAKernelDescriptor.h:173
llvm::AMDGPUAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AMDGPUAsmPrinter.cpp:347
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
Definition: AMDKernelCodeT.h:163
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:117
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:265
llvm::SIInstrInfo
Definition: SIInstrInfo.h:43
amd_kernel_code_t::workitem_vgpr_count
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
Definition: AMDKernelCodeT.h:599
llvm::SIProgramInfo
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
G_00B84C_USER_SGPR
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:938
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:204
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:541
G_00B84C_TIDIG_COMP_CNT
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:956
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:84
Version
uint64_t Version
Definition: RawMemProfReader.cpp:40
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:236
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:164
llvm::TargetRegistry::RegisterAsmPrinter
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
Definition: TargetRegistry.h:920
S_00B028_VGPRS
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:901
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:213
llvm::AsmPrinter::getObjFileLowering
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:382
MCStreamer.h
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:348
AMDKernelCodeT.h
llvm::AsmPrinter::emitFunctionBody
void emitFunctionBody()
This method emits the body and trailer for a function.
Definition: AsmPrinter.cpp:1413
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
Definition: AMDKernelCodeT.h:103
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:126
llvm::AMDGPU::isHsaAbiVersion3AndAbove
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
S_00B84C_LDS_SIZE
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:963
llvm::AMDGPUResourceUsageAnalysis
Definition: AMDGPUResourceUsageAnalysis.h:27
G_00B84C_TGID_X_EN
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:944
llvm::SIMachineFunctionInfo::hasQueuePtr
bool hasQueuePtr() const
Definition: SIMachineFunctionInfo.h:671
llvm::SIMachineFunctionInfo::hasWorkGroupIDY
bool hasWorkGroupIDY() const
Definition: SIMachineFunctionInfo.h:691
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:249
llvm::AMDGPUAsmPrinter::emitFunctionEntryLabel
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AMDGPUAsmPrinter.cpp:275
llvm::SIMachineFunctionInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition: SIMachineFunctionInfo.h:675
llvm::SIProgramInfo::DX10Clamp
uint32_t DX10Clamp
Definition: SIProgramInfo.h:32
llvm::SIMachineFunctionInfo::getPSInputAddr
unsigned getPSInputAddr() const
Definition: SIMachineFunctionInfo.h:856
llvm::SIProgramInfo::ScratchBlocks
uint32_t ScratchBlocks
Definition: SIProgramInfo.h:41
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:272
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
Definition: AMDKernelCodeT.h:99
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUTargetStreamer::getPALMetadata
AMDGPUPALMetadata * getPALMetadata()
Definition: AMDGPUTargetStreamer.h:46
llvm::SIProgramInfo::SGPRBlocks
uint32_t SGPRBlocks
Definition: SIProgramInfo.h:28
llvm::AsmPrinter::isVerbose
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:254
LLVMInitializeAMDGPUAsmPrinter
void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter()
Definition: AMDGPUAsmPrinter.cpp:79
llvm::AMDGPUAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AMDGPUAsmPrinter.cpp:452
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
Definition: AMDGPUResourceUsageAnalysis.h:32
llvm::AMDGPUAsmPrinter::HexLines
std::vector< std::string > HexLines
Definition: AMDGPUAsmPrinter.h:135
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:22
TargetRegistry.h
llvm::AMDGPUAsmPrinter::emitGlobalVariable
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Definition: AMDGPUAsmPrinter.cpp:312
R_00B02C_SPI_SHADER_PGM_RSRC2_PS
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:907
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:76
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
Definition: AMDKernelCodeT.h:193
llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:378
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:662
amd_kernel_code_t::wavefront_sgpr_count
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
Definition: AMDKernelCodeT.h:595
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
Definition: AMDKernelCodeT.h:91
llvm::DS_Error
@ DS_Error
Definition: DiagnosticInfo.h:50
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:683
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
S_00B84C_TRAP_HANDLER
#define S_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:940
llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:467
llvm::AMDGPU::HSAMD::MetadataStreamerV4
Definition: AMDGPUHSAMetadataStreamer.h:134
llvm::AMDGPUInstPrinter::printRegOperand
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
Definition: AMDGPUInstPrinter.cpp:342
llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition: AMDGPUSubtarget.h:124
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:712
AMDGPUBaseInfo.h
S_00B860_WAVESIZE
#define S_00B860_WAVESIZE(x)
Definition: SIDefines.h:1032