LLVM  13.0.0git
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
12 /// code. When passed an MCAsmStreamer it prints assembly and when passed
13 /// an MCObjectStreamer it outputs binary code.
14 //
15 //===----------------------------------------------------------------------===//
16 //
17 
18 #include "AMDGPUAsmPrinter.h"
19 #include "AMDGPU.h"
21 #include "AMDKernelCodeT.h"
22 #include "GCNSubtarget.h"
25 #include "R600AsmPrinter.h"
26 #include "SIMachineFunctionInfo.h"
28 #include "Utils/AMDGPUBaseInfo.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/MC/MCAssembler.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCSectionELF.h"
33 #include "llvm/MC/MCStreamer.h"
38 
39 using namespace llvm;
40 using namespace llvm::AMDGPU;
41 
42 // We need to tell the runtime some amount ahead of time if we don't know the
43 // true stack size. Assume a smaller number if this is only due to dynamic /
44 // non-entry block allocas.
46  "amdgpu-assume-external-call-stack-size",
47  cl::desc("Assumed stack use of any external call (in bytes)"),
48  cl::Hidden,
49  cl::init(16384));
50 
52  "amdgpu-assume-dynamic-stack-object-size",
53  cl::desc("Assumed extra stack use if there are any "
54  "variable sized objects (in bytes)"),
55  cl::Hidden,
56  cl::init(4096));
57 
58 // This should get the default rounding mode from the kernel. We just set the
59 // default here, but this could change if the OpenCL rounding mode pragmas are
60 // used.
61 //
62 // The denormal mode here should match what is reported by the OpenCL runtime
63 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
64 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
65 //
66 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
67 // precision, and leaves single precision to flush all and does not report
68 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
69 // CL_FP_DENORM for both.
70 //
71 // FIXME: It seems some instructions do not support single precision denormals
72 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
73 // and sin_f32, cos_f32 on most parts).
74 
75 // We want to use these instructions, and using fp32 denormals also causes
76 // instructions to run at the double precision rate for the device so it's
77 // probably best to just report no single precision denormals.
81  FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |
82  FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());
83 }
84 
85 static AsmPrinter *
87  std::unique_ptr<MCStreamer> &&Streamer) {
88  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
89 }
90 
96 }
97 
99  std::unique_ptr<MCStreamer> Streamer)
100  : AsmPrinter(TM, std::move(Streamer)) {
101  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
103  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
104  } else if (isHsaAbiVersion3(getGlobalSTI())) {
105  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
106  } else {
107  HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
108  }
109  }
110 }
111 
113  return "AMDGPU Assembly Printer";
114 }
115 
117  return TM.getMCSubtargetInfo();
118 }
119 
121  if (!OutStreamer)
122  return nullptr;
123  return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
124 }
125 
127  // TODO: Which one is called first, emitStartOfAsmFile or
128  // emitFunctionBodyStart?
129  if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
130  initializeTargetID(M);
131 
132  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
134  return;
135 
138 
140  HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
141 
144 
146  return;
147 
148  // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
151 
152  // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
155  Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
156 }
157 
159  // Following code requires TargetStreamer to be present.
160  if (!getTargetStreamer())
161  return;
162 
163  if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
166 
167  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
168  // Emit HSA Metadata (NT_AMD_HSA_METADATA).
169  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
170  HSAMetadataStream->end();
171  bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
172  (void)Success;
173  assert(Success && "Malformed HSA Metadata");
174  }
175 }
176 
178  const MachineBasicBlock *MBB) const {
180  return false;
181 
182  if (MBB->empty())
183  return true;
184 
185  // If this is a block implementing a long branch, an expression relative to
186  // the start of the block is needed. to the start of the block.
187  // XXX - Is there a smarter way to check this?
188  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
189 }
190 
193  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
194  const Function &F = MF->getFunction();
195 
196  // TODO: Which one is called first, emitStartOfAsmFile or
197  // emitFunctionBodyStart?
199  initializeTargetID(*F.getParent());
200 
201  const auto &FunctionTargetID = STM.getTargetID();
202  // Make sure function's xnack settings are compatible with module's
203  // xnack settings.
204  if (FunctionTargetID.isXnackSupported() &&
205  FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
206  FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {
207  OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) +
208  "' function does not match module xnack setting");
209  return;
210  }
211  // Make sure function's sramecc settings are compatible with module's
212  // sramecc settings.
213  if (FunctionTargetID.isSramEccSupported() &&
214  FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
215  FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) {
216  OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) +
217  "' function does not match module sramecc setting");
218  return;
219  }
220 
221  if (!MFI.isEntryFunction())
222  return;
223 
224  if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
225  (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
226  F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
227  amd_kernel_code_t KernelCode;
228  getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
229  getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
230  }
231 
232  if (STM.isAmdHsaOS())
233  HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
234 }
235 
238  if (!MFI.isEntryFunction())
239  return;
240 
241  if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
243  return;
244 
245  auto &Streamer = getTargetStreamer()->getStreamer();
246  auto &Context = Streamer.getContext();
247  auto &ObjectFileInfo = *Context.getObjectFileInfo();
248  auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
249 
250  Streamer.PushSection();
251  Streamer.SwitchSection(&ReadOnlySection);
252 
253  // CP microcode requires the kernel descriptor to be allocated on 64 byte
254  // alignment.
255  Streamer.emitValueToAlignment(64, 0, 1, 0);
256  if (ReadOnlySection.getAlignment() < 64)
257  ReadOnlySection.setAlignment(Align(64));
258 
259  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
260 
261  SmallString<128> KernelName;
262  getNameWithPrefix(KernelName, &MF->getFunction());
264  STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
265  CurrentProgramInfo.NumVGPRsForWavesPerEU,
266  CurrentProgramInfo.NumSGPRsForWavesPerEU -
268  CurrentProgramInfo.VCCUsed,
269  CurrentProgramInfo.FlatUsed),
270  CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
271 
272  Streamer.PopSection();
273 }
274 
276  if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
279  return;
280  }
281 
283  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
284  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
289  }
290  if (DumpCodeInstEmitter) {
291  // Disassemble function name label to text.
292  DisasmLines.push_back(MF->getName().str() + ":");
294  HexLines.push_back("");
295  }
296 
298 }
299 
301  if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
302  // Write a line for the basic block label if it is not only fallthrough.
303  DisasmLines.push_back(
304  (Twine("BB") + Twine(getFunctionNumber())
305  + "_" + Twine(MBB.getNumber()) + ":").str());
307  HexLines.push_back("");
308  }
310 }
311 
314  if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
316  Twine(GV->getName()) +
317  ": unsupported initializer for address space");
318  return;
319  }
320 
321  // LDS variables aren't emitted in HSA or PAL yet.
322  const Triple::OSType OS = TM.getTargetTriple().getOS();
323  if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
324  return;
325 
326  MCSymbol *GVSym = getSymbol(GV);
327 
328  GVSym->redefineIfPossible();
329  if (GVSym->isDefined() || GVSym->isVariable())
330  report_fatal_error("symbol '" + Twine(GVSym->getName()) +
331  "' is already defined");
332 
333  const DataLayout &DL = GV->getParent()->getDataLayout();
334  uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
335  Align Alignment = GV->getAlign().getValueOr(Align(4));
336 
337  emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
338  emitLinkage(GV, GVSym);
339  if (auto TS = getTargetStreamer())
340  TS->emitAMDGPULDS(GVSym, Size, Alignment);
341  return;
342  }
343 
345 }
346 
348  CallGraphResourceInfo.clear();
349 
350  // Pad with s_code_end to help tools and guard against instruction prefetch
351  // causing stale data in caches. Arguably this should be done by the linker,
352  // which is why this isn't done for Mesa.
353  const MCSubtargetInfo &STI = *getGlobalSTI();
354  if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&
355  (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
356  STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
357  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
359  }
360 
362 }
363 
364 // Print comments that apply to both callable functions and entry points.
365 void AMDGPUAsmPrinter::emitCommonFunctionComments(
366  uint32_t NumVGPR,
367  Optional<uint32_t> NumAGPR,
368  uint32_t TotalNumVGPR,
369  uint32_t NumSGPR,
370  uint64_t ScratchSize,
371  uint64_t CodeSize,
372  const AMDGPUMachineFunction *MFI) {
373  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
374  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
375  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
376  if (NumAGPR) {
377  OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
378  OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
379  false);
380  }
381  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
382  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
383  false);
384 }
385 
386 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
387  const MachineFunction &MF) const {
389  uint16_t KernelCodeProperties = 0;
390 
391  if (MFI.hasPrivateSegmentBuffer()) {
392  KernelCodeProperties |=
393  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
394  }
395  if (MFI.hasDispatchPtr()) {
396  KernelCodeProperties |=
397  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
398  }
399  if (MFI.hasQueuePtr()) {
400  KernelCodeProperties |=
401  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
402  }
403  if (MFI.hasKernargSegmentPtr()) {
404  KernelCodeProperties |=
405  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
406  }
407  if (MFI.hasDispatchID()) {
408  KernelCodeProperties |=
409  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
410  }
411  if (MFI.hasFlatScratchInit()) {
412  KernelCodeProperties |=
413  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
414  }
416  KernelCodeProperties |=
417  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
418  }
419 
420  return KernelCodeProperties;
421 }
422 
423 amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
424  const MachineFunction &MF,
425  const SIProgramInfo &PI) const {
426  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
427  const Function &F = MF.getFunction();
428 
429  amdhsa::kernel_descriptor_t KernelDescriptor;
430  memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
431 
435 
436  KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
437  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
438 
439  Align MaxKernArgAlign;
440  KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
441 
442  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
443  KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
444  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
445 
446  assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
447  if (STM.hasGFX90AInsts())
448  KernelDescriptor.compute_pgm_rsrc3 =
449  CurrentProgramInfo.ComputePGMRSrc3GFX90A;
450 
451  return KernelDescriptor;
452 }
453 
455  CurrentProgramInfo = SIProgramInfo();
456 
458 
459  // The starting address of all shader programs must be 256 bytes aligned.
460  // Regular functions just need the basic required instruction alignment.
461  MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
462 
464 
465  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
467  // FIXME: This should be an explicit check for Mesa.
468  if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
469  MCSectionELF *ConfigSection =
470  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
471  OutStreamer->SwitchSection(ConfigSection);
472  }
473 
474  if (MFI->isModuleEntryFunction()) {
475  getSIProgramInfo(CurrentProgramInfo, MF);
476  } else {
477  auto I = CallGraphResourceInfo.insert(
478  std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
479  SIFunctionResourceInfo &Info = I.first->second;
480  assert(I.second && "should only be called once per function");
481  Info = analyzeResourceUsage(MF);
482  }
483 
484  if (STM.isAmdPalOS()) {
485  if (MFI->isEntryFunction())
486  EmitPALMetadata(MF, CurrentProgramInfo);
487  else if (MFI->isModuleEntryFunction())
488  emitPALFunctionMetadata(MF);
489  } else if (!STM.isAmdHsaOS()) {
490  EmitProgramInfoSI(MF, CurrentProgramInfo);
491  }
492 
493  DumpCodeInstEmitter = nullptr;
494  if (STM.dumpCode()) {
495  // For -dumpcode, get the assembler out of the streamer, even if it does
496  // not really want to let us have it. This only works with -filetype=obj.
497  bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
498  OutStreamer->setUseAssemblerInfoForParsing(true);
499  MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
500  OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
501  if (Assembler)
502  DumpCodeInstEmitter = Assembler->getEmitterPtr();
503  }
504 
505  DisasmLines.clear();
506  HexLines.clear();
507  DisasmLineMaxLen = 0;
508 
510 
511  if (isVerbose()) {
512  MCSectionELF *CommentSection =
513  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
514  OutStreamer->SwitchSection(CommentSection);
515 
516  if (!MFI->isEntryFunction()) {
517  OutStreamer->emitRawComment(" Function info:", false);
518  SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
519  emitCommonFunctionComments(
520  Info.NumVGPR,
521  STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
522  Info.getTotalNumVGPRs(STM),
523  Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
524  Info.PrivateSegmentSize,
525  getFunctionCodeSize(MF), MFI);
526  return false;
527  }
528 
529  OutStreamer->emitRawComment(" Kernel info:", false);
530  emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
531  STM.hasMAIInsts()
532  ? CurrentProgramInfo.NumAccVGPR
533  : Optional<uint32_t>(),
534  CurrentProgramInfo.NumVGPR,
535  CurrentProgramInfo.NumSGPR,
536  CurrentProgramInfo.ScratchSize,
537  getFunctionCodeSize(MF), MFI);
538 
539  OutStreamer->emitRawComment(
540  " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
541  OutStreamer->emitRawComment(
542  " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
543  OutStreamer->emitRawComment(
544  " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
545  " bytes/workgroup (compile time only)", false);
546 
547  OutStreamer->emitRawComment(
548  " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
549  OutStreamer->emitRawComment(
550  " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
551 
552  OutStreamer->emitRawComment(
553  " NumSGPRsForWavesPerEU: " +
554  Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
555  OutStreamer->emitRawComment(
556  " NumVGPRsForWavesPerEU: " +
557  Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
558 
559  if (STM.hasGFX90AInsts())
560  OutStreamer->emitRawComment(
561  " AccumOffset: " +
562  Twine((CurrentProgramInfo.AccumOffset + 1) * 4), false);
563 
564  OutStreamer->emitRawComment(
565  " Occupancy: " +
566  Twine(CurrentProgramInfo.Occupancy), false);
567 
568  OutStreamer->emitRawComment(
569  " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
570 
571  OutStreamer->emitRawComment(
572  " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
573  Twine(G_00B84C_SCRATCH_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
574  OutStreamer->emitRawComment(
575  " COMPUTE_PGM_RSRC2:USER_SGPR: " +
576  Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
577  OutStreamer->emitRawComment(
578  " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
579  Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
580  OutStreamer->emitRawComment(
581  " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
582  Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
583  OutStreamer->emitRawComment(
584  " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
585  Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
586  OutStreamer->emitRawComment(
587  " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
588  Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
589  OutStreamer->emitRawComment(
590  " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
591  Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
592  false);
593 
594  assert(STM.hasGFX90AInsts() ||
595  CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
596  if (STM.hasGFX90AInsts()) {
597  OutStreamer->emitRawComment(
598  " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
599  Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
600  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
601  false);
602  OutStreamer->emitRawComment(
603  " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
604  Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
605  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
606  false);
607  }
608  }
609 
610  if (DumpCodeInstEmitter) {
611 
612  OutStreamer->SwitchSection(
613  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
614 
615  for (size_t i = 0; i < DisasmLines.size(); ++i) {
616  std::string Comment = "\n";
617  if (!HexLines[i].empty()) {
618  Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
619  Comment += " ; " + HexLines[i] + "\n";
620  }
621 
622  OutStreamer->emitBytes(StringRef(DisasmLines[i]));
623  OutStreamer->emitBytes(StringRef(Comment));
624  }
625  }
626 
627  return false;
628 }
629 
630 // TODO: Fold this into emitFunctionBodyStart.
631 void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
632  // In the beginning all features are either 'Any' or 'NotSupported',
633  // depending on global target features. This will cover empty modules.
635  *getGlobalSTI(), getGlobalSTI()->getFeatureString());
636 
637  // If module is empty, we are done.
638  if (M.empty())
639  return;
640 
641  // If module is not empty, need to find first 'Off' or 'On' feature
642  // setting per feature from functions in module.
643  for (auto &F : M) {
644  auto &TSTargetID = getTargetStreamer()->getTargetID();
645  if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
646  (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
647  break;
648 
649  const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
650  const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
651  if (TSTargetID->isXnackSupported())
652  if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
653  TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
654  if (TSTargetID->isSramEccSupported())
655  if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
656  TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
657  }
658 }
659 
660 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
661  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
662  const SIInstrInfo *TII = STM.getInstrInfo();
663 
664  uint64_t CodeSize = 0;
665 
666  for (const MachineBasicBlock &MBB : MF) {
667  for (const MachineInstr &MI : MBB) {
668  // TODO: CodeSize should account for multiple functions.
669 
670  // TODO: Should we count size of debug info?
671  if (MI.isDebugInstr())
672  continue;
673 
674  CodeSize += TII->getInstSizeInBytes(MI);
675  }
676  }
677 
678  return CodeSize;
679 }
680 
682  const SIInstrInfo &TII,
683  unsigned Reg) {
684  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
685  if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
686  return true;
687  }
688 
689  return false;
690 }
691 
693  const GCNSubtarget &ST) const {
694  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(
695  &ST, UsesVCC, UsesFlatScratch, ST.getTargetID().isXnackOnOrAny());
696 }
697 
699  const GCNSubtarget &ST) const {
700  if (ST.hasGFX90AInsts() && NumAGPR)
701  return alignTo(NumVGPR, 4) + NumAGPR;
702  return std::max(NumVGPR, NumAGPR);
703 }
704 
706  if (Op.isImm()) {
707  assert(Op.getImm() == 0);
708  return nullptr;
709  }
710 
711  return cast<Function>(Op.getGlobal());
712 }
713 
714 AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
715  const MachineFunction &MF) const {
716  SIFunctionResourceInfo Info;
717 
720  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
722  const SIInstrInfo *TII = ST.getInstrInfo();
723  const SIRegisterInfo &TRI = TII->getRegisterInfo();
724 
725  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
726  MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
727 
728  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
729  // instructions aren't used to access the scratch buffer. Inline assembly may
730  // need it though.
731  //
732  // If we only have implicit uses of flat_scr on flat instructions, it is not
733  // really needed.
734  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
735  (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
736  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
737  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
738  Info.UsesFlatScratch = false;
739  }
740 
741  Info.PrivateSegmentSize = FrameInfo.getStackSize();
742 
743  // Assume a big number if there are any unknown sized objects.
744  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
745  if (Info.HasDynamicallySizedStack)
746  Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
747 
748  if (MFI->isStackRealigned())
749  Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
750 
751  Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
752  MRI.isPhysRegUsed(AMDGPU::VCC_HI);
753 
754  // If there are no calls, MachineRegisterInfo can tell us the used register
755  // count easily.
756  // A tail call isn't considered a call for MachineFrameInfo's purposes.
757  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
758  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
759  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
760  if (MRI.isPhysRegUsed(Reg)) {
761  HighestVGPRReg = Reg;
762  break;
763  }
764  }
765 
766  if (ST.hasMAIInsts()) {
767  MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
768  for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
769  if (MRI.isPhysRegUsed(Reg)) {
770  HighestAGPRReg = Reg;
771  break;
772  }
773  }
774  Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
775  TRI.getHWRegIndex(HighestAGPRReg) + 1;
776  }
777 
778  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
779  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
780  if (MRI.isPhysRegUsed(Reg)) {
781  HighestSGPRReg = Reg;
782  break;
783  }
784  }
785 
786  // We found the maximum register index. They start at 0, so add one to get the
787  // number of registers.
788  Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
789  TRI.getHWRegIndex(HighestVGPRReg) + 1;
790  Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
791  TRI.getHWRegIndex(HighestSGPRReg) + 1;
792 
793  return Info;
794  }
795 
796  int32_t MaxVGPR = -1;
797  int32_t MaxAGPR = -1;
798  int32_t MaxSGPR = -1;
799  uint64_t CalleeFrameSize = 0;
800 
801  for (const MachineBasicBlock &MBB : MF) {
802  for (const MachineInstr &MI : MBB) {
803  // TODO: Check regmasks? Do they occur anywhere except calls?
804  for (const MachineOperand &MO : MI.operands()) {
805  unsigned Width = 0;
806  bool IsSGPR = false;
807  bool IsAGPR = false;
808 
809  if (!MO.isReg())
810  continue;
811 
812  Register Reg = MO.getReg();
813  switch (Reg) {
814  case AMDGPU::EXEC:
815  case AMDGPU::EXEC_LO:
816  case AMDGPU::EXEC_HI:
817  case AMDGPU::SCC:
818  case AMDGPU::M0:
819  case AMDGPU::SRC_SHARED_BASE:
820  case AMDGPU::SRC_SHARED_LIMIT:
821  case AMDGPU::SRC_PRIVATE_BASE:
822  case AMDGPU::SRC_PRIVATE_LIMIT:
823  case AMDGPU::SGPR_NULL:
824  case AMDGPU::MODE:
825  continue;
826 
827  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
828  llvm_unreachable("src_pops_exiting_wave_id should not be used");
829 
830  case AMDGPU::NoRegister:
831  assert(MI.isDebugInstr() && "Instruction uses invalid noreg register");
832  continue;
833 
834  case AMDGPU::VCC:
835  case AMDGPU::VCC_LO:
836  case AMDGPU::VCC_HI:
837  case AMDGPU::VCC_LO_LO16:
838  case AMDGPU::VCC_LO_HI16:
839  case AMDGPU::VCC_HI_LO16:
840  case AMDGPU::VCC_HI_HI16:
841  Info.UsesVCC = true;
842  continue;
843 
844  case AMDGPU::FLAT_SCR:
845  case AMDGPU::FLAT_SCR_LO:
846  case AMDGPU::FLAT_SCR_HI:
847  continue;
848 
849  case AMDGPU::XNACK_MASK:
850  case AMDGPU::XNACK_MASK_LO:
851  case AMDGPU::XNACK_MASK_HI:
852  llvm_unreachable("xnack_mask registers should not be used");
853 
854  case AMDGPU::LDS_DIRECT:
855  llvm_unreachable("lds_direct register should not be used");
856 
857  case AMDGPU::TBA:
858  case AMDGPU::TBA_LO:
859  case AMDGPU::TBA_HI:
860  case AMDGPU::TMA:
861  case AMDGPU::TMA_LO:
862  case AMDGPU::TMA_HI:
863  llvm_unreachable("trap handler registers should not be used");
864 
865  case AMDGPU::SRC_VCCZ:
866  llvm_unreachable("src_vccz register should not be used");
867 
868  case AMDGPU::SRC_EXECZ:
869  llvm_unreachable("src_execz register should not be used");
870 
871  case AMDGPU::SRC_SCC:
872  llvm_unreachable("src_scc register should not be used");
873 
874  default:
875  break;
876  }
877 
878  if (AMDGPU::SReg_32RegClass.contains(Reg) ||
879  AMDGPU::SReg_LO16RegClass.contains(Reg) ||
880  AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
881  assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
882  "trap handler registers should not be used");
883  IsSGPR = true;
884  Width = 1;
885  } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
886  AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
887  AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
888  IsSGPR = false;
889  Width = 1;
890  } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
891  AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
892  IsSGPR = false;
893  IsAGPR = true;
894  Width = 1;
895  } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
896  assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
897  "trap handler registers should not be used");
898  IsSGPR = true;
899  Width = 2;
900  } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
901  IsSGPR = false;
902  Width = 2;
903  } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
904  IsSGPR = false;
905  IsAGPR = true;
906  Width = 2;
907  } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
908  IsSGPR = false;
909  Width = 3;
910  } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
911  IsSGPR = true;
912  Width = 3;
913  } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
914  IsSGPR = false;
915  IsAGPR = true;
916  Width = 3;
917  } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
918  assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
919  "trap handler registers should not be used");
920  IsSGPR = true;
921  Width = 4;
922  } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
923  IsSGPR = false;
924  Width = 4;
925  } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
926  IsSGPR = false;
927  IsAGPR = true;
928  Width = 4;
929  } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
930  IsSGPR = false;
931  Width = 5;
932  } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
933  IsSGPR = true;
934  Width = 5;
935  } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
936  IsSGPR = false;
937  IsAGPR = true;
938  Width = 5;
939  } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
940  IsSGPR = false;
941  Width = 6;
942  } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
943  IsSGPR = true;
944  Width = 6;
945  } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
946  IsSGPR = false;
947  IsAGPR = true;
948  Width = 6;
949  } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
950  assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
951  "trap handler registers should not be used");
952  IsSGPR = true;
953  Width = 8;
954  } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
955  IsSGPR = false;
956  Width = 8;
957  } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
958  IsSGPR = false;
959  IsAGPR = true;
960  Width = 8;
961  } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
962  assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
963  "trap handler registers should not be used");
964  IsSGPR = true;
965  Width = 16;
966  } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
967  IsSGPR = false;
968  Width = 16;
969  } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
970  IsSGPR = false;
971  IsAGPR = true;
972  Width = 16;
973  } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
974  IsSGPR = true;
975  Width = 32;
976  } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
977  IsSGPR = false;
978  Width = 32;
979  } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
980  IsSGPR = false;
981  IsAGPR = true;
982  Width = 32;
983  } else {
984  llvm_unreachable("Unknown register class");
985  }
986  unsigned HWReg = TRI.getHWRegIndex(Reg);
987  int MaxUsed = HWReg + Width - 1;
988  if (IsSGPR) {
989  MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
990  } else if (IsAGPR) {
991  MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
992  } else {
993  MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
994  }
995  }
996 
997  if (MI.isCall()) {
998  // Pseudo used just to encode the underlying global. Is there a better
999  // way to track this?
1000 
1001  const MachineOperand *CalleeOp
1002  = TII->getNamedOperand(MI, AMDGPU::OpName::callee);
1003 
1004  const Function *Callee = getCalleeFunction(*CalleeOp);
1006  CallGraphResourceInfo.end();
1007  bool IsExternal = !Callee || Callee->isDeclaration();
1008  if (!IsExternal)
1009  I = CallGraphResourceInfo.find(Callee);
1010 
1011  if (IsExternal || I == CallGraphResourceInfo.end()) {
1012  // Avoid crashing on undefined behavior with an illegal call to a
1013  // kernel. If a callsite's calling convention doesn't match the
1014  // function's, it's undefined behavior. If the callsite calling
1015  // convention does match, that would have errored earlier.
1016  // FIXME: The verifier shouldn't allow this.
1017  if (!IsExternal &&
1018  AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
1019  report_fatal_error("invalid call to entry function");
1020 
1021  // If this is a call to an external function, we can't do much. Make
1022  // conservative guesses.
1023 
1024  // 48 SGPRs - vcc, - flat_scr, -xnack
1025  int MaxSGPRGuess =
1026  47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
1027  MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
1028  MaxVGPR = std::max(MaxVGPR, 23);
1029  MaxAGPR = std::max(MaxAGPR, 23);
1030 
1031  CalleeFrameSize = std::max(CalleeFrameSize,
1032  static_cast<uint64_t>(AssumedStackSizeForExternalCall));
1033 
1034  Info.UsesVCC = true;
1035  Info.UsesFlatScratch = ST.hasFlatAddressSpace();
1036  Info.HasDynamicallySizedStack = true;
1037  } else {
1038  // We force CodeGen to run in SCC order, so the callee's register
1039  // usage etc. should be the cumulative usage of all callees.
1040 
1041  MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
1042  MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
1043  MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
1044  CalleeFrameSize
1045  = std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
1046  Info.UsesVCC |= I->second.UsesVCC;
1047  Info.UsesFlatScratch |= I->second.UsesFlatScratch;
1048  Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
1049  Info.HasRecursion |= I->second.HasRecursion;
1050  }
1051 
1052  // FIXME: Call site could have norecurse on it
1053  if (!Callee || !Callee->doesNotRecurse())
1054  Info.HasRecursion = true;
1055  }
1056  }
1057  }
1058 
1059  Info.NumExplicitSGPR = MaxSGPR + 1;
1060  Info.NumVGPR = MaxVGPR + 1;
1061  Info.NumAGPR = MaxAGPR + 1;
1062  Info.PrivateSegmentSize += CalleeFrameSize;
1063 
1064  return Info;
1065 }
1066 
1067 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1068  const MachineFunction &MF) {
1069  SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
1070  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1071 
1072  ProgInfo.NumArchVGPR = Info.NumVGPR;
1073  ProgInfo.NumAccVGPR = Info.NumAGPR;
1074  ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
1075  ProgInfo.AccumOffset = alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1;
1076  ProgInfo.TgSplit = STM.isTgSplitEnabled();
1077  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
1078  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
1079  ProgInfo.VCCUsed = Info.UsesVCC;
1080  ProgInfo.FlatUsed = Info.UsesFlatScratch;
1081  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
1082 
1083  const uint64_t MaxScratchPerWorkitem =
1085  if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
1086  DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
1087  ProgInfo.ScratchSize, DS_Error);
1088  MF.getFunction().getContext().diagnose(DiagStackSize);
1089  }
1090 
1092 
1093  // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
1094  // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
1095  // unified.
1096  unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
1097  &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
1098 
1099  // Check the addressable register limit before we add ExtraSGPRs.
1101  !STM.hasSGPRInitBug()) {
1102  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
1103  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
1104  // This can happen due to a compiler bug or when using inline asm.
1105  LLVMContext &Ctx = MF.getFunction().getContext();
1107  "addressable scalar registers",
1108  ProgInfo.NumSGPR, DS_Error,
1110  MaxAddressableNumSGPRs);
1111  Ctx.diagnose(Diag);
1112  ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
1113  }
1114  }
1115 
1116  // Account for extra SGPRs and VGPRs reserved for debugger use.
1117  ProgInfo.NumSGPR += ExtraSGPRs;
1118 
1119  const Function &F = MF.getFunction();
1120 
1121  // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
1122  // dispatch registers are function args.
1123  unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
1124 
1125  if (isShader(F.getCallingConv())) {
1126  // FIXME: We should be using the number of registers determined during
1127  // calling convention lowering to legalize the types.
1128  const DataLayout &DL = F.getParent()->getDataLayout();
1129  for (auto &Arg : F.args()) {
1130  unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
1131  if (Arg.hasAttribute(Attribute::InReg))
1132  WaveDispatchNumSGPR += NumRegs;
1133  else
1134  WaveDispatchNumVGPR += NumRegs;
1135  }
1136  ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
1137  ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
1138  }
1139 
1140  // Adjust number of registers used to meet default/requested minimum/maximum
1141  // number of waves per execution unit request.
1142  ProgInfo.NumSGPRsForWavesPerEU = std::max(
1143  std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
1144  ProgInfo.NumVGPRsForWavesPerEU = std::max(
1145  std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
1146 
1148  STM.hasSGPRInitBug()) {
1149  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
1150  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
1151  // This can happen due to a compiler bug or when using inline asm to use
1152  // the registers which are usually reserved for vcc etc.
1153  LLVMContext &Ctx = MF.getFunction().getContext();
1155  "scalar registers",
1156  ProgInfo.NumSGPR, DS_Error,
1158  MaxAddressableNumSGPRs);
1159  Ctx.diagnose(Diag);
1160  ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
1161  ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
1162  }
1163  }
1164 
1165  if (STM.hasSGPRInitBug()) {
1166  ProgInfo.NumSGPR =
1168  ProgInfo.NumSGPRsForWavesPerEU =
1170  }
1171 
1172  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
1173  LLVMContext &Ctx = MF.getFunction().getContext();
1174  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
1175  MFI->getNumUserSGPRs(), DS_Error);
1176  Ctx.diagnose(Diag);
1177  }
1178 
1179  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
1180  LLVMContext &Ctx = MF.getFunction().getContext();
1181  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
1182  MFI->getLDSSize(), DS_Error);
1183  Ctx.diagnose(Diag);
1184  }
1185 
1187  &STM, ProgInfo.NumSGPRsForWavesPerEU);
1189  &STM, ProgInfo.NumVGPRsForWavesPerEU);
1190 
1191  const SIModeRegisterDefaults Mode = MFI->getMode();
1192 
1193  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
1194  // register.
1195  ProgInfo.FloatMode = getFPMode(Mode);
1196 
1197  ProgInfo.IEEEMode = Mode.IEEE;
1198 
1199  // Make clamp modifier on NaN input returns 0.
1200  ProgInfo.DX10Clamp = Mode.DX10Clamp;
1201 
1202  unsigned LDSAlignShift;
1204  // LDS is allocated in 64 dword blocks.
1205  LDSAlignShift = 8;
1206  } else {
1207  // LDS is allocated in 128 dword blocks.
1208  LDSAlignShift = 9;
1209  }
1210 
1211  unsigned LDSSpillSize =
1213 
1214  ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
1215  ProgInfo.LDSBlocks =
1216  alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1217 
1218  // Scratch is allocated in 256 dword blocks.
1219  unsigned ScratchAlignShift = 10;
1220  // We need to program the hardware with the amount of scratch memory that
1221  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
1222  // scratch memory used per thread.
1223  ProgInfo.ScratchBlocks =
1224  alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
1225  1ULL << ScratchAlignShift) >>
1226  ScratchAlignShift;
1227 
1228  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
1229  ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
1230  ProgInfo.MemOrdered = 1;
1231  }
1232 
1233  // 0 = X, 1 = XY, 2 = XYZ
1234  unsigned TIDIGCompCnt = 0;
1235  if (MFI->hasWorkItemIDZ())
1236  TIDIGCompCnt = 2;
1237  else if (MFI->hasWorkItemIDY())
1238  TIDIGCompCnt = 1;
1239 
1240  ProgInfo.ComputePGMRSrc2 =
1241  S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
1243  // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
1249  S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
1251  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
1252  S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
1253  S_00B84C_EXCP_EN(0);
1254 
1255  if (STM.hasGFX90AInsts()) {
1257  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1258  ProgInfo.AccumOffset);
1260  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1261  ProgInfo.TgSplit);
1262  }
1263 
1264  ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
1265  ProgInfo.NumSGPRsForWavesPerEU,
1266  ProgInfo.NumVGPRsForWavesPerEU);
1267 }
1268 
1269 static unsigned getRsrcReg(CallingConv::ID CallConv) {
1270  switch (CallConv) {
1271  default: LLVM_FALLTHROUGH;
1279  }
1280 }
1281 
1282 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
1283  const SIProgramInfo &CurrentProgramInfo) {
1285  unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
1286 
1289 
1290  OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
1291 
1293  OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
1294 
1296  OutStreamer->emitInt32(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks));
1297 
1298  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
1299  // 0" comment but I don't see a corresponding field in the register spec.
1300  } else {
1301  OutStreamer->emitInt32(RsrcReg);
1302  OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
1303  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
1305  OutStreamer->emitIntValue(
1306  S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
1307  }
1308 
1311  OutStreamer->emitInt32(
1312  S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
1314  OutStreamer->emitInt32(MFI->getPSInputEnable());
1316  OutStreamer->emitInt32(MFI->getPSInputAddr());
1317  }
1318 
1319  OutStreamer->emitInt32(R_SPILLED_SGPRS);
1320  OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());
1321  OutStreamer->emitInt32(R_SPILLED_VGPRS);
1322  OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
1323 }
1324 
1325 // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
1326 // is AMDPAL. It stores each compute/SPI register setting and other PAL
1327 // metadata items into the PALMD::Metadata, combining with any provided by the
1328 // frontend as LLVM metadata. Once all functions are written, the PAL metadata
1329 // is then written as a single block in the .note section.
1330 void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
1331  const SIProgramInfo &CurrentProgramInfo) {
1333  auto CC = MF.getFunction().getCallingConv();
1334  auto MD = getTargetStreamer()->getPALMetadata();
1335 
1336  MD->setEntryPoint(CC, MF.getFunction().getName());
1337  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1338  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1339  MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
1340  if (AMDGPU::isCompute(CC)) {
1341  MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
1342  } else {
1343  if (CurrentProgramInfo.ScratchBlocks > 0)
1344  MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
1345  }
1346  // ScratchSize is in bytes, 16 aligned.
1347  MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
1349  MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
1350  MD->setSpiPsInputEna(MFI->getPSInputEnable());
1351  MD->setSpiPsInputAddr(MFI->getPSInputAddr());
1352  }
1353 
1354  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1355  if (STM.isWave32())
1356  MD->setWave32(MF.getFunction().getCallingConv());
1357 }
1358 
1359 void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
1360  auto *MD = getTargetStreamer()->getPALMetadata();
1361  const MachineFrameInfo &MFI = MF.getFrameInfo();
1362  MD->setFunctionScratchSize(MF, MFI.getStackSize());
1363  // Set compute registers
1364  MD->setRsrc1(CallingConv::AMDGPU_CS,
1365  CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
1366  MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
1367 }
1368 
1369 // This is supposed to be log2(Size)
1371  switch (Size) {
1372  case 4:
1373  return AMD_ELEMENT_4_BYTES;
1374  case 8:
1375  return AMD_ELEMENT_8_BYTES;
1376  case 16:
1377  return AMD_ELEMENT_16_BYTES;
1378  default:
1379  llvm_unreachable("invalid private_element_size");
1380  }
1381 }
1382 
1383 void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1384  const SIProgramInfo &CurrentProgramInfo,
1385  const MachineFunction &MF) const {
1386  const Function &F = MF.getFunction();
1387  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
1388  F.getCallingConv() == CallingConv::SPIR_KERNEL);
1389 
1391  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1392 
1394 
1396  CurrentProgramInfo.getComputePGMRSrc1() |
1397  (CurrentProgramInfo.ComputePGMRSrc2 << 32);
1399 
1400  if (CurrentProgramInfo.DynamicCallStack)
1402 
1405  getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
1406 
1407  if (MFI->hasPrivateSegmentBuffer()) {
1408  Out.code_properties |=
1410  }
1411 
1412  if (MFI->hasDispatchPtr())
1414 
1415  if (MFI->hasQueuePtr())
1417 
1418  if (MFI->hasKernargSegmentPtr())
1420 
1421  if (MFI->hasDispatchID())
1423 
1424  if (MFI->hasFlatScratchInit())
1426 
1427  if (MFI->hasDispatchPtr())
1429 
1430  if (STM.isXNACKEnabled())
1432 
1433  Align MaxKernArgAlign;
1434  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1435  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1436  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1437  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1438  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1439 
1440  // kernarg_segment_alignment is specified as log of the alignment.
1441  // The minimum alignment is 16.
1442  Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
1443 }
1444 
1446  const char *ExtraCode, raw_ostream &O) {
1447  // First try the generic code, which knows about modifiers like 'c' and 'n'.
1448  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
1449  return false;
1450 
1451  if (ExtraCode && ExtraCode[0]) {
1452  if (ExtraCode[1] != 0)
1453  return true; // Unknown modifier.
1454 
1455  switch (ExtraCode[0]) {
1456  case 'r':
1457  break;
1458  default:
1459  return true;
1460  }
1461  }
1462 
1463  // TODO: Should be able to support other operand types like globals.
1464  const MachineOperand &MO = MI->getOperand(OpNo);
1465  if (MO.isReg()) {
1468  return false;
1469  } else if (MO.isImm()) {
1470  int64_t Val = MO.getImm();
1471  if (AMDGPU::isInlinableIntLiteral(Val)) {
1472  O << Val;
1473  } else if (isUInt<16>(Val)) {
1474  O << format("0x%" PRIx16, static_cast<uint16_t>(Val));
1475  } else if (isUInt<32>(Val)) {
1476  O << format("0x%" PRIx32, static_cast<uint32_t>(Val));
1477  } else {
1478  O << format("0x%" PRIx64, static_cast<uint64_t>(Val));
1479  }
1480  return false;
1481  }
1482  return true;
1483 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::AMDGPUAsmPrinter::emitBasicBlockStart
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
Definition: AMDGPUAsmPrinter.cpp:300
llvm::MCTargetStreamer::getStreamer
MCStreamer & getStreamer()
Definition: MCStreamer.h:99
S_00B84C_TGID_Y_EN
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:815
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
llvm::AMDGPU::isHsaAbiVersion3
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:114
getFPMode
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode)
Definition: AMDGPUAsmPrinter.cpp:78
FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:879
llvm::AMDGPUTargetStreamer::getTargetID
const Optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
Definition: AMDGPUTargetStreamer.h:102
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
getRsrcReg
static unsigned getRsrcReg(CallingConv::ID CallConv)
Definition: AMDGPUAsmPrinter.cpp:1269
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
Definition: AMDKernelCodeT.h:95
llvm
Definition: AllocatorList.h:23
llvm::AMDGPUPALMetadata::readFromIR
void readFromIR(Module &M)
Definition: AMDGPUPALMetadata.cpp:31
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:218
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:778
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
AMD_ELEMENT_4_BYTES
@ AMD_ELEMENT_4_BYTES
Definition: AMDKernelCodeT.h:55
llvm::AMDGPU::getIsaVersion
IsaVersion getIsaVersion(StringRef GPU)
Definition: TargetParser.cpp:186
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SIMachineFunctionInfo::getLDSWaveSpillSize
unsigned getLDSWaveSpillSize() const
Definition: SIMachineFunctionInfo.h:888
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:71
llvm::Function
Definition: Function.h:61
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setXnackSetting
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Definition: AMDGPUBaseInfo.h:120
llvm::SIMachineFunctionInfo::getNumSpilledSGPRs
unsigned getNumSpilledSGPRs() const
Definition: SIMachineFunctionInfo.h:796
AMDGPUHSAMetadataStreamer.h
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
@ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
Definition: AMDKernelCodeT.h:184
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::AMDGPUTargetStreamer::EmitDirectiveAMDGCNTarget
virtual void EmitDirectiveAMDGCNTarget()=0
MCSectionELF.h
FP_DENORM_MODE_DP
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:898
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:82
amd_kernel_code_t::compute_pgm_resource_registers
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Definition: AMDKernelCodeT.h:558
llvm::AMDGPUAsmPrinter::AMDGPUAsmPrinter
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
Definition: AMDGPUAsmPrinter.cpp:98
llvm::SIProgramInfo::WgpMode
uint32_t WgpMode
Definition: SIProgramInfo.h:35
llvm::SIProgramInfo::NumSGPR
uint32_t NumSGPR
Definition: SIProgramInfo.h:51
llvm::AMDGPUAsmPrinter::emitFunctionBodyStart
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:191
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::SystemZICMP::Any
@ Any
Definition: SystemZISelLowering.h:383
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::AsmPrinter::getNameWithPrefix
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:467
MCAssembler.h
llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:1637
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition: SIDefines.h:800
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::ELF::SHT_PROGBITS
@ SHT_PROGBITS
Definition: ELF.h:903
llvm::AMDGPUPALMetadata::setEntryPoint
void setEntryPoint(unsigned CC, StringRef Name)
Definition: AMDGPUPALMetadata.cpp:188
R_0286CC_SPI_PS_INPUT_ENA
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:839
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:316
amd_element_byte_size_t
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
Definition: AMDKernelCodeT.h:53
AMDGPUAsmPrinter.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
llvm::MCSymbol::isDefined
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition: MCSymbol.h:242
amd_kernel_code_t::workgroup_group_segment_byte_size
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
Definition: AMDKernelCodeT.h:574
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:524
llvm::AMDGPUMachineFunction::getLDSSize
unsigned getLDSSize() const
Definition: AMDGPUMachineFunction.h:70
llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition: AMDGPUBaseInfo.cpp:623
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:686
llvm::AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition: AMDGPUAsmPrinter.cpp:177
AMD_HSA_BITS_SET
#define AMD_HSA_BITS_SET(dst, mask, val)
Definition: AMDKernelCodeT.h:43
llvm::AMDGPU::HSAMD::MetadataStreamerV3
Definition: AMDGPUHSAMetadataStreamer.h:54
llvm::AMDGPUTargetStreamer::EmitDirectiveHSACodeObjectVersion
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
llvm::SIMachineFunctionInfo::getPSInputEnable
unsigned getPSInputEnable() const
Definition: SIMachineFunctionInfo.h:816
llvm::Optional< uint32_t >
llvm::AMDGPU::SIModeRegisterDefaults
Definition: AMDGPUBaseInfo.h:885
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
S_00B84C_USER_SGPR
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:806
llvm::AMDGPUAsmPrinter
Definition: AMDGPUAsmPrinter.h:40
llvm::SIProgramInfo::NumVGPR
uint32_t NumVGPR
Definition: SIProgramInfo.h:46
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
Definition: AMDKernelCodeT.h:107
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:248
llvm::AMDGPUTargetStreamer::EmitAMDKernelCodeT
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
llvm::MCSectionELF
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:28
llvm::SIProgramInfo::NumSGPRsForWavesPerEU
uint32_t NumSGPRsForWavesPerEU
Definition: SIProgramInfo.h:56
llvm::MCObjectFileInfo::getContext
MCContext & getContext() const
Definition: MCObjectFileInfo.h:232
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc2
uint32_t compute_pgm_rsrc2
Definition: AMDHSAKernelDescriptor.h:174
llvm::SIProgramInfo::LDSSize
uint32_t LDSSize
Definition: SIProgramInfo.h:52
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::SIProgramInfo::AccumOffset
uint32_t AccumOffset
Definition: SIProgramInfo.h:49
R_0286D0_SPI_PS_INPUT_ADDR
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:840
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:592
llvm::AMDGPUTargetStreamer::initializeTargetID
void initializeTargetID(const MCSubtargetInfo &STI)
Definition: AMDGPUTargetStreamer.h:108
llvm::AsmPrinter::emitGlobalVariable
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:494
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:92
AMDHSAKernelDescriptor.h
AMD_ELEMENT_8_BYTES
@ AMD_ELEMENT_8_BYTES
Definition: AMDKernelCodeT.h:56
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:214
S_00B84C_TIDIG_COMP_CNT
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:824
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::AMDGPU::IsaVersion
Instruction set architecture version.
Definition: TargetParser.h:102
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
R600AsmPrinter.h
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:106
S_00B84C_SCRATCH_EN
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:803
llvm::AMDGPU::isGFX90A
bool isGFX90A(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1424
getCalleeFunction
static const Function * getCalleeFunction(const MachineOperand &Op)
Definition: AMDGPUAsmPrinter.cpp:705
llvm::SIProgramInfo::MemOrdered
uint32_t MemOrdered
Definition: SIProgramInfo.h:36
S_00B84C_EXCP_EN_MSB
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:828
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
llvm::AMDGPUMachineFunction::needsWaveLimiter
bool needsWaveLimiter() const
Definition: AMDGPUMachineFunction.h:92
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:85
TargetMachine.h
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1313
llvm::AsmPrinter::emitLinkage
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
Definition: AsmPrinter.cpp:427
llvm::SIProgramInfo::NumArchVGPR
uint32_t NumArchVGPR
Definition: SIProgramInfo.h:47
llvm::SIProgramInfo::ComputePGMRSrc2
uint64_t ComputePGMRSrc2
Definition: SIProgramInfo.h:43
GCNSubtarget.h
S_00B84C_TGID_Z_EN
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:818
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:534
llvm::MachineFunction::setAlignment
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
Definition: MachineFunction.h:606
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:653
S_00B02C_EXTRA_LDS_SIZE
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:777
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
Definition: AMDKernelCodeT.h:87
G_00B84C_TRAP_HANDLER
#define G_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:810
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:97
llvm::SIProgramInfo::ScratchSize
uint64_t ScratchSize
Definition: SIProgramInfo.h:37
AMDGPUTargetInfo.h
llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:217
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:107
MCContext.h
llvm::SIProgramInfo::NumVGPRsForWavesPerEU
uint32_t NumVGPRsForWavesPerEU
Definition: SIProgramInfo.h:59
llvm::SIMachineFunctionInfo::hasWorkGroupIDZ
bool hasWorkGroupIDZ() const
Definition: SIMachineFunctionInfo.h:647
llvm::AsmPrinter::SetupMachineFunction
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
Definition: AsmPrinter.cpp:1886
llvm::AsmPrinter::emitFunctionEntryLabel
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:829
AssumedStackSizeForExternalCall
static cl::opt< uint32_t > AssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
llvm::SIProgramInfo::LDSBlocks
uint32_t LDSBlocks
Definition: SIProgramInfo.h:40
llvm::amdhsa::kernel_descriptor_t::group_segment_fixed_size
uint32_t group_segment_fixed_size
Definition: AMDHSAKernelDescriptor.h:166
llvm::DiagnosticInfoResourceLimit
Diagnostic information for stack size etc.
Definition: DiagnosticInfo.h:179
llvm::AsmPrinter::isBlockOnlyReachableByFallthrough
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition: AsmPrinter.cpp:3286
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:783
llvm::AMDGPU
Definition: AMDGPUMetadataVerifier.h:22
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1074
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:889
G_00B84C_TGID_Y_EN
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:816
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
amd_kernel_code_t::workitem_private_segment_byte_size
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
Definition: AMDKernelCodeT.h:568
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
getElementByteSizeValue
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
Definition: AMDGPUAsmPrinter.cpp:1370
llvm::SIProgramInfo::ComputePGMRSrc3GFX90A
uint64_t ComputePGMRSrc3GFX90A
Definition: SIProgramInfo.h:44
llvm::AMDGPUAsmPrinter::PrintAsmOperand
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AMDGPUAsmPrinter.cpp:1445
llvm::ELF::STT_AMDGPU_HSA_KERNEL
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1157
llvm::SIProgramInfo::DynamicCallStack
bool DynamicCallStack
Definition: SIProgramInfo.h:66
AMDHSA_BITS_GET
#define AMDHSA_BITS_GET(SRC, MSK)
Definition: AMDHSAKernelDescriptor.h:37
llvm::AMDGPUAsmPrinter::emitStartOfAsmFile
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
Definition: AMDGPUAsmPrinter.cpp:126
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:563
llvm::amdhsa::kernel_descriptor_t::kernel_code_properties
uint16_t kernel_code_properties
Definition: AMDHSAKernelDescriptor.h:175
llvm::IndexedInstrProf::Version
const uint64_t Version
Definition: InstrProf.h:991
llvm::MCAssembler::getEmitterPtr
MCCodeEmitter * getEmitterPtr() const
Definition: MCAssembler.h:296
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:197
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:532
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:248
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:442
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Definition: AMDGPUSubtarget.h:188
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3158
llvm::AMDGPU::isGFX10Plus
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1410
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc3
uint32_t compute_pgm_rsrc3
Definition: AMDHSAKernelDescriptor.h:172
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::AMDGPUMachineFunction::getMode
AMDGPU::SIModeRegisterDefaults getMode() const
Definition: AMDGPUMachineFunction.h:74
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1336
llvm::SmallString< 128 >
llvm::AMDGPU::isHsaAbiVersion2
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:108
llvm::DK_ResourceLimit
@ DK_ResourceLimit
Definition: DiagnosticInfo.h:57
llvm::AMDGPUTargetStreamer::EmitAmdhsaKernelDescriptor
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr)=0
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:180
S_00B84C_TGID_X_EN
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:812
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:190
R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:802
llvm::GCNSubtarget::MaxWaveScratchSize
static const unsigned MaxWaveScratchSize
Definition: GCNSubtarget.h:205
AMD_CODE_PROPERTY_IS_PTR64
@ AMD_CODE_PROPERTY_IS_PTR64
Definition: AMDKernelCodeT.h:172
R_0286E8_SPI_TMPRING_SIZE
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:903
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::Optional::getValueOr
constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION
Definition: Optional.h:295
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::amdhsa::kernel_descriptor_t::private_segment_fixed_size
uint32_t private_segment_fixed_size
Definition: AMDHSAKernelDescriptor.h:167
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:202
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:128
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:211
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:136
llvm::AMDGPU::isInlinableIntLiteral
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
Definition: AMDGPUBaseInfo.h:810
llvm::MCAssembler
Definition: MCAssembler.h:60
llvm::SIProgramInfo::FloatMode
uint32_t FloatMode
Definition: SIProgramInfo.h:30
AMD_ELEMENT_16_BYTES
@ AMD_ELEMENT_16_BYTES
Definition: AMDKernelCodeT.h:57
AMDGPUTargetStreamer.h
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: SIMachineFunctionInfo.h:788
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:318
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::GlobalValue::getVisibility
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:229
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:228
R_SPILLED_VGPRS
#define R_SPILLED_VGPRS
Definition: SIDefines.h:916
llvm::SIMachineFunctionInfo::hasWorkGroupInfo
bool hasWorkGroupInfo() const
Definition: SIMachineFunctionInfo.h:651
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::Triple::getOS
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:316
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:206
llvm::SIMachineFunctionInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: SIMachineFunctionInfo.h:868
llvm::AMDGPUAsmPrinter::emitEndOfAsmFile
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
Definition: AMDGPUAsmPrinter.cpp:158
R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:769
llvm::SIMachineFunctionInfo::hasDispatchID
bool hasDispatchID() const
Definition: SIMachineFunctionInfo.h:631
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AMDGPU::initDefaultAMDKernelCodeT
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:730
llvm::DenseMap
Definition: DenseMap.h:714
R_00B848_COMPUTE_PGM_RSRC1
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:842
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
llvm::SIMachineFunctionInfo::getNumSpilledVGPRs
unsigned getNumSpilledVGPRs() const
Definition: SIMachineFunctionInfo.h:800
llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:78
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
AMDGPUInstPrinter.h
llvm::AMDGPUTargetStreamer::EmitISAVersion
virtual bool EmitISAVersion()=0
llvm::AMDGPUAsmPrinter::getTargetStreamer
AMDGPUTargetStreamer * getTargetStreamer() const
Definition: AMDGPUAsmPrinter.cpp:120
llvm::MCSymbol::isVariable
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition: MCSymbol.h:290
llvm::MachineRegisterInfo::reg_operands
iterator_range< reg_iterator > reg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:286
R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition: SIDefines.h:792
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:522
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:472
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1563
llvm::AMDGPUAsmPrinter::DisasmLines
std::vector< std::string > DisasmLines
Definition: AMDGPUAsmPrinter.h:151
llvm::AMDGPUAsmPrinter::getGlobalSTI
const MCSubtargetInfo * getGlobalSTI() const
Definition: AMDGPUAsmPrinter.cpp:116
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:571
createAMDGPUAsmPrinterPass
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Definition: AMDGPUAsmPrinter.cpp:86
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::MCContext::reportError
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:917
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1050
llvm::SIProgramInfo::VGPRBlocks
uint32_t VGPRBlocks
Definition: SIProgramInfo.h:27
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:999
llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:100
llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:92
llvm::SIProgramInfo::getPGMRSrc1
uint64_t getPGMRSrc1(CallingConv::ID CC) const
Definition: SIProgramInfo.cpp:31
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:949
llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:570
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:357
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::AMDGPUMachineFunction::isMemoryBound
bool isMemoryBound() const
Definition: AMDGPUMachineFunction.h:88
llvm::AMDGPU::IsaInfo::getNumSGPRBlocks
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
Definition: AMDGPUBaseInfo.cpp:653
amd_kernel_code_t::kernarg_segment_alignment
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment.
Definition: AMDKernelCodeT.h:634
amd_kernel_code_t::kernarg_segment_byte_size
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel.
Definition: AMDKernelCodeT.h:583
llvm::getCPU
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Definition: AVRTargetMachine.cpp:31
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1486
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:285
FP_DENORM_MODE_SP
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:897
llvm::MCSymbol::redefineIfPossible
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition: MCSymbol.h:224
S_0286E8_WAVESIZE
#define S_0286E8_WAVESIZE(x)
Definition: SIDefines.h:904
llvm::SIMachineFunctionInfo::hasWorkGroupIDX
bool hasWorkGroupIDX() const
Definition: SIMachineFunctionInfo.h:639
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:965
AssumedStackSizeForDynamicSizeObjects
static cl::opt< uint32_t > AssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
llvm::AMDGPUTargetStreamer::EmitCodeEnd
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)=0
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AMDGPUAsmPrinter::emitFunctionBodyEnd
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:236
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:478
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1332
TargetLoweringObjectFile.h
uint32_t
llvm::AMDGPU::IsaInfo::getNumVGPRBlocks
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:720
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
G_00B84C_SCRATCH_EN
#define G_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:804
llvm::DiagnosticInfoStackSize
Definition: DiagnosticInfo.h:217
llvm::AMDGPUTargetStreamer
Definition: AMDGPUTargetStreamer.h:39
amd_kernel_code_t
AMD Kernel Code Object (amd_kernel_code_t).
Definition: AMDKernelCodeT.h:526
R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition: SIDefines.h:791
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::SIMachineFunctionInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition: SIMachineFunctionInfo.h:615
AMDHSA_BITS_SET
#define AMDHSA_BITS_SET(DST, MSK, VAL)
Definition: AMDHSAKernelDescriptor.h:42
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:298
llvm::AMDGPUAsmPrinter::DisasmLineMaxLen
size_t DisasmLineMaxLen
Definition: AMDGPUAsmPrinter.h:152
llvm::amdhsa::kernel_descriptor_t::kernarg_size
uint32_t kernarg_size
Definition: AMDHSAKernelDescriptor.h:168
G_00B84C_TGID_Z_EN
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:819
FP_ROUND_MODE_SP
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:886
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1005
FP_ROUND_MODE_DP
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:887
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::SIProgramInfo::FlatUsed
bool FlatUsed
Definition: SIProgramInfo.h:53
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:226
llvm::SIMachineFunctionInfo::getNumUserSGPRs
unsigned getNumUserSGPRs() const
Definition: SIMachineFunctionInfo.h:711
llvm::SIProgramInfo::IEEEMode
uint32_t IEEEMode
Definition: SIProgramInfo.h:34
llvm::SIProgramInfo::NumAccVGPR
uint32_t NumAccVGPR
Definition: SIProgramInfo.h:48
llvm::SIMachineFunctionInfo::hasWorkItemIDZ
bool hasWorkItemIDZ() const
Definition: SIMachineFunctionInfo.h:667
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:205
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::SIProgramInfo::Occupancy
uint32_t Occupancy
Definition: SIProgramInfo.h:62
llvm::AMDGPU::HSAMD::Kernel::Key::SymbolName
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
Definition: AMDGPUMetadata.h:381
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:147
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:228
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::AMDGPUTargetStreamer::EmitAMDGPUSymbolType
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
llvm::Triple::OSType
OSType
Definition: Triple.h:164
S_00B84C_TG_SIZE_EN
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:821
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:263
std
Definition: BitVector.h:838
llvm::SIMachineFunctionInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const
Definition: SIMachineFunctionInfo.h:852
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::SIMachineFunctionInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition: SIMachineFunctionInfo.h:619
S_00B84C_EXCP_EN
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:835
uint16_t
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::AMDGPUMachineFunction::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition: AMDGPUMachineFunction.h:82
amd_kernel_code_t::code_properties
uint32_t code_properties
Code properties.
Definition: AMDKernelCodeT.h:562
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:250
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:377
llvm::createR600AsmPrinterPass
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Definition: R600AsmPrinter.cpp:31
R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:900
Success
#define Success
Definition: AArch64Disassembler.cpp:248
llvm::Triple::AMDPAL
@ AMDPAL
Definition: Triple.h:197
llvm::SIProgramInfo::TgSplit
uint32_t TgSplit
Definition: SIProgramInfo.h:50
DiagnosticInfo.h
llvm::SIMachineFunctionInfo::hasWorkItemIDY
bool hasWorkItemIDY() const
Definition: SIMachineFunctionInfo.h:663
llvm::amdhsa::kernel_descriptor_t
Definition: AMDHSAKernelDescriptor.h:165
llvm::AsmPrinter
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:82
S_00B028_SGPRS
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:771
llvm::SIProgramInfo::VCCUsed
bool VCCUsed
Definition: SIProgramInfo.h:69
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:585
llvm::AsmPrinter::emitVisibility
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
Definition: AsmPrinter.cpp:3246
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:780
llvm::AMDGPU::HSAMD::MetadataStreamerV2
Definition: AMDGPUHSAMetadataStreamer.h:138
llvm::AMDGPUTargetStreamer::EmitDirectiveHSACodeObjectISAV2
virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:320
llvm::SIProgramInfo::getComputePGMRSrc1
uint64_t getComputePGMRSrc1() const
Compute the value of the ComputePGMRsrc1 register.
Definition: SIProgramInfo.cpp:23
llvm::AMDGPUAsmPrinter::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUAsmPrinter.cpp:112
R_SPILLED_SGPRS
#define R_SPILLED_SGPRS
Definition: SIDefines.h:915
llvm::amdhsa::kernel_descriptor_t::compute_pgm_rsrc1
uint32_t compute_pgm_rsrc1
Definition: AMDHSAKernelDescriptor.h:173
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::AMDGPUAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AMDGPUAsmPrinter.cpp:347
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
Definition: AMDKernelCodeT.h:163
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:112
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:265
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
amd_kernel_code_t::workitem_vgpr_count
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
Definition: AMDKernelCodeT.h:599
llvm::SIProgramInfo
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
G_00B84C_USER_SGPR
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:807
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:536
G_00B84C_TIDIG_COMP_CNT
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:825
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:85
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:161
llvm::TargetRegistry::RegisterAsmPrinter
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
Definition: TargetRegistry.h:811
S_00B028_VGPRS
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:770
llvm::AsmPrinter::getObjFileLowering
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:222
MCStreamer.h
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:331
AMDKernelCodeT.h
llvm::AsmPrinter::emitFunctionBody
void emitFunctionBody()
This method emits the body and trailer for a function.
Definition: AsmPrinter.cpp:1181
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
Definition: AMDKernelCodeT.h:103
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:123
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
S_00B84C_LDS_SIZE
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:832
G_00B84C_TGID_X_EN
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:813
llvm::SIMachineFunctionInfo::hasQueuePtr
bool hasQueuePtr() const
Definition: SIMachineFunctionInfo.h:623
llvm::SIMachineFunctionInfo::hasWorkGroupIDY
bool hasWorkGroupIDY() const
Definition: SIMachineFunctionInfo.h:643
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:240
llvm::AMDGPUAsmPrinter::emitFunctionEntryLabel
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AMDGPUAsmPrinter.cpp:275
llvm::SIMachineFunctionInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition: SIMachineFunctionInfo.h:627
llvm::SIProgramInfo::DX10Clamp
uint32_t DX10Clamp
Definition: SIProgramInfo.h:32
llvm::SIMachineFunctionInfo::getPSInputAddr
unsigned getPSInputAddr() const
Definition: SIMachineFunctionInfo.h:812
llvm::SIProgramInfo::ScratchBlocks
uint32_t ScratchBlocks
Definition: SIProgramInfo.h:41
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:231
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
Definition: AMDKernelCodeT.h:99
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:205
callee
Here we don t need to write any variables to the top of the stack since they don t overwrite each other int callee(int32 arg1, int32 arg2)
llvm::cl::desc
Definition: CommandLine.h:411
llvm::AMDGPUTargetStreamer::getPALMetadata
AMDGPUPALMetadata * getPALMetadata()
Definition: AMDGPUTargetStreamer.h:51
llvm::SIProgramInfo::SGPRBlocks
uint32_t SGPRBlocks
Definition: SIProgramInfo.h:28
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:199
llvm::AsmPrinter::isVerbose
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:228
LLVMInitializeAMDGPUAsmPrinter
void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter()
Definition: AMDGPUAsmPrinter.cpp:91
llvm::AMDGPU::isHsaAbiVersion3Or4
bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:126
llvm::AMDGPUAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AMDGPUAsmPrinter.cpp:454
llvm::AMDGPUAsmPrinter::HexLines
std::vector< std::string > HexLines
Definition: AMDGPUAsmPrinter.h:151
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:20
TargetRegistry.h
llvm::AMDGPUAsmPrinter::emitGlobalVariable
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Definition: AMDGPUAsmPrinter.cpp:312
R_00B02C_SPI_SHADER_PGM_RSRC2_PS
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:776
llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition: AMDGPUBaseInfo.h:74
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
Definition: AMDKernelCodeT.h:193
llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:218
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:710
amd_kernel_code_t::wavefront_sgpr_count
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
Definition: AMDKernelCodeT.h:595
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
Definition: AMDKernelCodeT.h:91
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:208
llvm::DS_Error
@ DS_Error
Definition: DiagnosticInfo.h:45
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:635
S_00B84C_TRAP_HANDLER
#define S_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:809
llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:600
llvm::AMDGPU::HSAMD::MetadataStreamerV4
Definition: AMDGPUHSAMetadataStreamer.h:124
llvm::AMDGPUInstPrinter::printRegOperand
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
Definition: AMDGPUInstPrinter.cpp:341
llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition: AMDGPUSubtarget.h:110
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:696
AMDGPUBaseInfo.h
hasAnyNonFlatUseOfReg
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
Definition: AMDGPUAsmPrinter.cpp:681
S_00B860_WAVESIZE
#define S_00B860_WAVESIZE(x)
Definition: SIDefines.h:901