LLVM  16.0.0git
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Implements the AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUSubtarget.h"
15 #include "AMDGPUCallLowering.h"
17 #include "AMDGPULegalizerInfo.h"
18 #include "AMDGPURegisterBankInfo.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "R600Subtarget.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/SmallString.h"
27 #include "llvm/IR/IntrinsicsAMDGPU.h"
28 #include "llvm/IR/IntrinsicsR600.h"
29 #include "llvm/IR/MDBuilder.h"
31 #include <algorithm>
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "amdgpu-subtarget"
36 
37 #define GET_SUBTARGETINFO_TARGET_DESC
38 #define GET_SUBTARGETINFO_CTOR
39 #define AMDGPUSubtarget GCNSubtarget
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #undef AMDGPUSubtarget
42 
44  "amdgpu-enable-power-sched",
45  cl::desc("Enable scheduling to minimize mAI power bursts"),
46  cl::init(false));
47 
49  "amdgpu-vgpr-index-mode",
50  cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
51  cl::init(false));
52 
53 static cl::opt<bool> UseAA("amdgpu-use-aa-in-codegen",
54  cl::desc("Enable the use of AA during codegen."),
55  cl::init(true));
56 
57 static cl::opt<unsigned> NSAThreshold("amdgpu-nsa-threshold",
58  cl::desc("Number of addresses from which to enable MIMG NSA."),
59  cl::init(3), cl::Hidden);
60 
61 GCNSubtarget::~GCNSubtarget() = default;
62 
65  StringRef GPU, StringRef FS) {
66  // Determine default and user-specified characteristics
67  //
68  // We want to be able to turn these off, but making this a subtarget feature
69  // for SI has the unhelpful behavior that it unsets everything else if you
70  // disable it.
71  //
72  // Similarly we want enable-prt-strict-null to be on by default and not to
73  // unset everything else if it is disabled
74 
75  SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,");
76 
77  // Turn on features that HSA ABI requires. Also turn on FlatForGlobal by default
78  if (isAmdHsaOS())
79  FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";
80 
81  FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
82 
83  // Disable mutually exclusive bits.
84  if (FS.contains_insensitive("+wavefrontsize")) {
85  if (!FS.contains_insensitive("wavefrontsize16"))
86  FullFS += "-wavefrontsize16,";
87  if (!FS.contains_insensitive("wavefrontsize32"))
88  FullFS += "-wavefrontsize32,";
89  if (!FS.contains_insensitive("wavefrontsize64"))
90  FullFS += "-wavefrontsize64,";
91  }
92 
93  FullFS += FS;
94 
95  ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
96 
97  // Implement the "generic" processors, which acts as the default when no
98  // generation features are enabled (e.g for -mcpu=''). HSA OS defaults to
99  // the first amdgcn target that supports flat addressing. Other OSes defaults
100  // to the first amdgcn target.
101  if (Gen == AMDGPUSubtarget::INVALID) {
104  }
105 
106  // We don't support FP64 for EG/NI atm.
108 
109  // Targets must either support 64-bit offsets for MUBUF instructions, and/or
110  // support flat operations, otherwise they cannot access a 64-bit global
111  // address space
112  assert(hasAddr64() || hasFlat());
113  // Unless +-flat-for-global is specified, turn on FlatForGlobal for targets
114  // that do not support ADDR64 variants of MUBUF instructions. Such targets
115  // cannot use a 64 bit offset with a MUBUF instruction to access the global
116  // address space
117  if (!hasAddr64() && !FS.contains("flat-for-global") && !FlatForGlobal) {
118  ToggleFeature(AMDGPU::FeatureFlatForGlobal);
119  FlatForGlobal = true;
120  }
121  // Unless +-flat-for-global is specified, use MUBUF instructions for global
122  // address space access if flat operations are not available.
123  if (!hasFlat() && !FS.contains("flat-for-global") && FlatForGlobal) {
124  ToggleFeature(AMDGPU::FeatureFlatForGlobal);
125  FlatForGlobal = false;
126  }
127 
128  // Set defaults if needed.
129  if (MaxPrivateElementSize == 0)
131 
132  if (LDSBankCount == 0)
133  LDSBankCount = 32;
134 
135  if (TT.getArch() == Triple::amdgcn) {
136  if (LocalMemorySize == 0)
137  LocalMemorySize = 32768;
138 
139  // Do something sensible for unspecified target.
140  if (!HasMovrel && !HasVGPRIndexMode)
141  HasMovrel = true;
142  }
143 
144  // Don't crash on invalid devices.
145  if (WavefrontSizeLog2 == 0)
146  WavefrontSizeLog2 = 5;
147 
150 
152 
153  LLVM_DEBUG(dbgs() << "xnack setting for subtarget: "
154  << TargetID.getXnackSetting() << '\n');
155  LLVM_DEBUG(dbgs() << "sramecc setting for subtarget: "
156  << TargetID.getSramEccSetting() << '\n');
157 
158  return *this;
159 }
160 
161 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {}
162 
164  const GCNTargetMachine &TM)
165  : // clang-format off
166  AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS),
167  AMDGPUSubtarget(TT),
168  TargetTriple(TT),
169  TargetID(*this),
170  InstrItins(getInstrItineraryForCPU(GPU)),
171  InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
172  TLInfo(TM, *this),
173  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
174  // clang-format on
176  CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
177  InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
178  Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
179  RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
180  InstSelector.reset(new AMDGPUInstructionSelector(
181  *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
182 }
183 
184 unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
185  if (getGeneration() < GFX10)
186  return 1;
187 
188  switch (Opcode) {
189  case AMDGPU::V_LSHLREV_B64_e64:
190  case AMDGPU::V_LSHLREV_B64_gfx10:
191  case AMDGPU::V_LSHLREV_B64_e64_gfx11:
192  case AMDGPU::V_LSHL_B64_e64:
193  case AMDGPU::V_LSHRREV_B64_e64:
194  case AMDGPU::V_LSHRREV_B64_gfx10:
195  case AMDGPU::V_LSHRREV_B64_e64_gfx11:
196  case AMDGPU::V_LSHR_B64_e64:
197  case AMDGPU::V_ASHRREV_I64_e64:
198  case AMDGPU::V_ASHRREV_I64_gfx10:
199  case AMDGPU::V_ASHRREV_I64_e64_gfx11:
200  case AMDGPU::V_ASHR_I64_e64:
201  return 1;
202  }
203 
204  return 2;
205 }
206 
207 /// This list was mostly derived from experimentation.
208 bool GCNSubtarget::zeroesHigh16BitsOfDest(unsigned Opcode) const {
209  switch (Opcode) {
210  case AMDGPU::V_CVT_F16_F32_e32:
211  case AMDGPU::V_CVT_F16_F32_e64:
212  case AMDGPU::V_CVT_F16_U16_e32:
213  case AMDGPU::V_CVT_F16_U16_e64:
214  case AMDGPU::V_CVT_F16_I16_e32:
215  case AMDGPU::V_CVT_F16_I16_e64:
216  case AMDGPU::V_RCP_F16_e64:
217  case AMDGPU::V_RCP_F16_e32:
218  case AMDGPU::V_RSQ_F16_e64:
219  case AMDGPU::V_RSQ_F16_e32:
220  case AMDGPU::V_SQRT_F16_e64:
221  case AMDGPU::V_SQRT_F16_e32:
222  case AMDGPU::V_LOG_F16_e64:
223  case AMDGPU::V_LOG_F16_e32:
224  case AMDGPU::V_EXP_F16_e64:
225  case AMDGPU::V_EXP_F16_e32:
226  case AMDGPU::V_SIN_F16_e64:
227  case AMDGPU::V_SIN_F16_e32:
228  case AMDGPU::V_COS_F16_e64:
229  case AMDGPU::V_COS_F16_e32:
230  case AMDGPU::V_FLOOR_F16_e64:
231  case AMDGPU::V_FLOOR_F16_e32:
232  case AMDGPU::V_CEIL_F16_e64:
233  case AMDGPU::V_CEIL_F16_e32:
234  case AMDGPU::V_TRUNC_F16_e64:
235  case AMDGPU::V_TRUNC_F16_e32:
236  case AMDGPU::V_RNDNE_F16_e64:
237  case AMDGPU::V_RNDNE_F16_e32:
238  case AMDGPU::V_FRACT_F16_e64:
239  case AMDGPU::V_FRACT_F16_e32:
240  case AMDGPU::V_FREXP_MANT_F16_e64:
241  case AMDGPU::V_FREXP_MANT_F16_e32:
242  case AMDGPU::V_FREXP_EXP_I16_F16_e64:
243  case AMDGPU::V_FREXP_EXP_I16_F16_e32:
244  case AMDGPU::V_LDEXP_F16_e64:
245  case AMDGPU::V_LDEXP_F16_e32:
246  case AMDGPU::V_LSHLREV_B16_e64:
247  case AMDGPU::V_LSHLREV_B16_e32:
248  case AMDGPU::V_LSHRREV_B16_e64:
249  case AMDGPU::V_LSHRREV_B16_e32:
250  case AMDGPU::V_ASHRREV_I16_e64:
251  case AMDGPU::V_ASHRREV_I16_e32:
252  case AMDGPU::V_ADD_U16_e64:
253  case AMDGPU::V_ADD_U16_e32:
254  case AMDGPU::V_SUB_U16_e64:
255  case AMDGPU::V_SUB_U16_e32:
256  case AMDGPU::V_SUBREV_U16_e64:
257  case AMDGPU::V_SUBREV_U16_e32:
258  case AMDGPU::V_MUL_LO_U16_e64:
259  case AMDGPU::V_MUL_LO_U16_e32:
260  case AMDGPU::V_ADD_F16_e64:
261  case AMDGPU::V_ADD_F16_e32:
262  case AMDGPU::V_SUB_F16_e64:
263  case AMDGPU::V_SUB_F16_e32:
264  case AMDGPU::V_SUBREV_F16_e64:
265  case AMDGPU::V_SUBREV_F16_e32:
266  case AMDGPU::V_MUL_F16_e64:
267  case AMDGPU::V_MUL_F16_e32:
268  case AMDGPU::V_MAX_F16_e64:
269  case AMDGPU::V_MAX_F16_e32:
270  case AMDGPU::V_MIN_F16_e64:
271  case AMDGPU::V_MIN_F16_e32:
272  case AMDGPU::V_MAX_U16_e64:
273  case AMDGPU::V_MAX_U16_e32:
274  case AMDGPU::V_MIN_U16_e64:
275  case AMDGPU::V_MIN_U16_e32:
276  case AMDGPU::V_MAX_I16_e64:
277  case AMDGPU::V_MAX_I16_e32:
278  case AMDGPU::V_MIN_I16_e64:
279  case AMDGPU::V_MIN_I16_e32:
280  case AMDGPU::V_MAD_F16_e64:
281  case AMDGPU::V_MAD_U16_e64:
282  case AMDGPU::V_MAD_I16_e64:
283  case AMDGPU::V_FMA_F16_e64:
284  case AMDGPU::V_DIV_FIXUP_F16_e64:
285  // On gfx10, all 16-bit instructions preserve the high bits.
287  case AMDGPU::V_MADAK_F16:
288  case AMDGPU::V_MADMK_F16:
289  case AMDGPU::V_MAC_F16_e64:
290  case AMDGPU::V_MAC_F16_e32:
291  case AMDGPU::V_FMAMK_F16:
292  case AMDGPU::V_FMAAK_F16:
293  case AMDGPU::V_FMAC_F16_e64:
294  case AMDGPU::V_FMAC_F16_e32:
295  // In gfx9, the preferred handling of the unused high 16-bits changed. Most
296  // instructions maintain the legacy behavior of 0ing. Some instructions
297  // changed to preserving the high bits.
299  case AMDGPU::V_MAD_MIXLO_F16:
300  case AMDGPU::V_MAD_MIXHI_F16:
301  default:
302  return false;
303  }
304 }
305 
307  const Function &F) const {
308  if (NWaves == 1)
309  return getLocalMemorySize();
310  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
311  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
312  if (!WorkGroupsPerCu)
313  return 0;
314  unsigned MaxWaves = getMaxWavesPerEU();
315  return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
316 }
317 
318 // FIXME: Should return min,max range.
320  const Function &F) const {
321  const unsigned MaxWorkGroupSize = getFlatWorkGroupSizes(F).second;
322  const unsigned MaxWorkGroupsPerCu = getMaxWorkGroupsPerCU(MaxWorkGroupSize);
323  if (!MaxWorkGroupsPerCu)
324  return 0;
325 
326  const unsigned WaveSize = getWavefrontSize();
327 
328  // FIXME: Do we need to account for alignment requirement of LDS rounding the
329  // size up?
330  // Compute restriction based on LDS usage
331  unsigned NumGroups = getLocalMemorySize() / (Bytes ? Bytes : 1u);
332 
333  // This can be queried with more LDS than is possible, so just assume the
334  // worst.
335  if (NumGroups == 0)
336  return 1;
337 
338  NumGroups = std::min(MaxWorkGroupsPerCu, NumGroups);
339 
340  // Round to the number of waves.
341  const unsigned MaxGroupNumWaves = (MaxWorkGroupSize + WaveSize - 1) / WaveSize;
342  unsigned MaxWaves = NumGroups * MaxGroupNumWaves;
343 
344  // Clamp to the maximum possible number of waves.
345  MaxWaves = std::min(MaxWaves, getMaxWavesPerEU());
346 
347  // FIXME: Needs to be a multiple of the group size?
348  //MaxWaves = MaxGroupNumWaves * (MaxWaves / MaxGroupNumWaves);
349 
350  assert(MaxWaves > 0 && MaxWaves <= getMaxWavesPerEU() &&
351  "computed invalid occupancy");
352  return MaxWaves;
353 }
354 
355 unsigned
357  const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
358  return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
359 }
360 
361 std::pair<unsigned, unsigned>
363  switch (CC) {
370  return std::make_pair(1, getWavefrontSize());
371  default:
372  return std::make_pair(1u, getMaxFlatWorkGroupSize());
373  }
374 }
375 
376 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
377  const Function &F) const {
378  // Default minimum/maximum flat work group sizes.
379  std::pair<unsigned, unsigned> Default =
380  getDefaultFlatWorkGroupSize(F.getCallingConv());
381 
382  // Requested minimum/maximum flat work group sizes.
383  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
384  F, "amdgpu-flat-work-group-size", Default);
385 
386  // Make sure requested minimum is less than requested maximum.
387  if (Requested.first > Requested.second)
388  return Default;
389 
390  // Make sure requested values do not violate subtarget's specifications.
391  if (Requested.first < getMinFlatWorkGroupSize())
392  return Default;
393  if (Requested.second > getMaxFlatWorkGroupSize())
394  return Default;
395 
396  return Requested;
397 }
398 
399 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
400  const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
401  // Default minimum/maximum number of waves per execution unit.
402  std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
403 
404  // If minimum/maximum flat work group sizes were explicitly requested using
405  // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
406  // number of waves per execution unit to values implied by requested
407  // minimum/maximum flat work group sizes.
408  unsigned MinImpliedByFlatWorkGroupSize =
409  getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);
410  Default.first = MinImpliedByFlatWorkGroupSize;
411 
412  // Requested minimum/maximum number of waves per execution unit.
413  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
414  F, "amdgpu-waves-per-eu", Default, true);
415 
416  // Make sure requested minimum is less than requested maximum.
417  if (Requested.second && Requested.first > Requested.second)
418  return Default;
419 
420  // Make sure requested values do not violate subtarget's specifications.
421  if (Requested.first < getMinWavesPerEU() ||
422  Requested.second > getMaxWavesPerEU())
423  return Default;
424 
425  // Make sure requested values are compatible with values implied by requested
426  // minimum/maximum flat work group sizes.
427  if (Requested.first < MinImpliedByFlatWorkGroupSize)
428  return Default;
429 
430  return Requested;
431 }
432 
433 static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim) {
434  auto Node = Kernel.getMetadata("reqd_work_group_size");
435  if (Node && Node->getNumOperands() == 3)
436  return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
438 }
439 
441  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
442 }
443 
445  unsigned Dimension) const {
446  unsigned ReqdSize = getReqdWorkGroupSize(Kernel, Dimension);
447  if (ReqdSize != std::numeric_limits<unsigned>::max())
448  return ReqdSize - 1;
449  return getFlatWorkGroupSizes(Kernel).second - 1;
450 }
451 
453  Function *Kernel = I->getParent()->getParent();
454  unsigned MinSize = 0;
455  unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
456  bool IdQuery = false;
457 
458  // If reqd_work_group_size is present it narrows value down.
459  if (auto *CI = dyn_cast<CallInst>(I)) {
460  const Function *F = CI->getCalledFunction();
461  if (F) {
462  unsigned Dim = UINT_MAX;
463  switch (F->getIntrinsicID()) {
464  case Intrinsic::amdgcn_workitem_id_x:
465  case Intrinsic::r600_read_tidig_x:
466  IdQuery = true;
467  [[fallthrough]];
468  case Intrinsic::r600_read_local_size_x:
469  Dim = 0;
470  break;
471  case Intrinsic::amdgcn_workitem_id_y:
472  case Intrinsic::r600_read_tidig_y:
473  IdQuery = true;
474  [[fallthrough]];
475  case Intrinsic::r600_read_local_size_y:
476  Dim = 1;
477  break;
478  case Intrinsic::amdgcn_workitem_id_z:
479  case Intrinsic::r600_read_tidig_z:
480  IdQuery = true;
481  [[fallthrough]];
482  case Intrinsic::r600_read_local_size_z:
483  Dim = 2;
484  break;
485  default:
486  break;
487  }
488 
489  if (Dim <= 3) {
490  unsigned ReqdSize = getReqdWorkGroupSize(*Kernel, Dim);
491  if (ReqdSize != std::numeric_limits<unsigned>::max())
492  MinSize = MaxSize = ReqdSize;
493  }
494  }
495  }
496 
497  if (!MaxSize)
498  return false;
499 
500  // Range metadata is [Lo, Hi). For ID query we need to pass max size
501  // as Hi. For size query we need to pass Hi + 1.
502  if (IdQuery)
503  MinSize = 0;
504  else
505  ++MaxSize;
506 
507  MDBuilder MDB(I->getContext());
508  MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
509  APInt(32, MaxSize));
510  I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
511  return true;
512 }
513 
515  assert(AMDGPU::isKernel(F.getCallingConv()));
516 
517  // We don't allocate the segment if we know the implicit arguments weren't
518  // used, even if the ABI implies we need them.
519  if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
520  return 0;
521 
522  if (isMesaKernel(F))
523  return 16;
524 
525  // Assume all implicit inputs are used by default
526  unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56;
527  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", NBytes);
528 }
529 
531  Align &MaxAlign) const {
532  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
533  F.getCallingConv() == CallingConv::SPIR_KERNEL);
534 
535  const DataLayout &DL = F.getParent()->getDataLayout();
536  uint64_t ExplicitArgBytes = 0;
537  MaxAlign = Align(1);
538 
539  for (const Argument &Arg : F.args()) {
540  const bool IsByRef = Arg.hasByRefAttr();
541  Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
542  Align Alignment = DL.getValueOrABITypeAlignment(
543  IsByRef ? Arg.getParamAlign() : None, ArgTy);
544  uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
545  ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
546  MaxAlign = std::max(MaxAlign, Alignment);
547  }
548 
549  return ExplicitArgBytes;
550 }
551 
553  Align &MaxAlign) const {
554  uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
555 
556  unsigned ExplicitOffset = getExplicitKernelArgOffset(F);
557 
558  uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
559  unsigned ImplicitBytes = getImplicitArgNumBytes(F);
560  if (ImplicitBytes != 0) {
561  const Align Alignment = getAlignmentForImplicitArgPtr();
562  TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
563  MaxAlign = std::max(MaxAlign, Alignment);
564  }
565 
566  // Being able to dereference past the end is useful for emitting scalar loads.
567  return alignTo(TotalSize, 4);
568 }
569 
573 }
574 
576  unsigned NumRegionInstrs) const {
577  // Track register pressure so the scheduler can try to decrease
578  // pressure once register usage is above the threshold defined by
579  // SIRegisterInfo::getRegPressureSetLimit()
580  Policy.ShouldTrackPressure = true;
581 
582  // Enabling both top down and bottom up scheduling seems to give us less
583  // register spills than just using one of these approaches on its own.
584  Policy.OnlyTopDown = false;
585  Policy.OnlyBottomUp = false;
586 
587  // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
588  if (!enableSIScheduler())
589  Policy.ShouldTrackLaneMasks = true;
590 }
591 
593  return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1;
594 }
595 
597  return !hasMovrel() || (EnableVGPRIndexMode && hasVGPRIndexMode());
598 }
599 
600 bool GCNSubtarget::useAA() const { return UseAA; }
601 
602 unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
604  return getMaxWavesPerEU();
605 
607  if (SGPRs <= 80)
608  return 10;
609  if (SGPRs <= 88)
610  return 9;
611  if (SGPRs <= 100)
612  return 8;
613  return 7;
614  }
615  if (SGPRs <= 48)
616  return 10;
617  if (SGPRs <= 56)
618  return 9;
619  if (SGPRs <= 64)
620  return 8;
621  if (SGPRs <= 72)
622  return 7;
623  if (SGPRs <= 80)
624  return 6;
625  return 5;
626 }
627 
628 unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
629  unsigned MaxWaves = getMaxWavesPerEU();
630  unsigned Granule = getVGPRAllocGranule();
631  if (VGPRs < Granule)
632  return MaxWaves;
633  unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule;
634  return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves);
635 }
636 
637 unsigned
638 GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratch) const {
640  return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
641 
642  if (HasFlatScratch || HasArchitectedFlatScratch) {
644  return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
646  return 4; // FLAT_SCRATCH, VCC (in that order).
647  }
648 
649  if (isXNACKEnabled())
650  return 4; // XNACK, VCC (in that order).
651  return 2; // VCC.
652 }
653 
657 }
658 
660  // In principle we do not need to reserve SGPR pair used for flat_scratch if
661  // we know flat instructions do not access the stack anywhere in the
662  // program. For now assume it's needed if we have flat instructions.
663  const bool KernelUsesFlatScratch = hasFlatAddressSpace();
664  return getBaseReservedNumSGPRs(KernelUsesFlatScratch);
665 }
666 
667 unsigned GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize,
668  unsigned NumSGPRs,
669  unsigned NumVGPRs) const {
670  unsigned Occupancy =
672  getOccupancyWithLocalMemSize(LDSSize, F));
673  if (NumSGPRs)
674  Occupancy = std::min(Occupancy, getOccupancyWithNumSGPRs(NumSGPRs));
675  if (NumVGPRs)
676  Occupancy = std::min(Occupancy, getOccupancyWithNumVGPRs(NumVGPRs));
677  return Occupancy;
678 }
679 
681  const Function &F, std::pair<unsigned, unsigned> WavesPerEU,
682  unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const {
683  // Compute maximum number of SGPRs function can use using default/requested
684  // minimum number of waves per execution unit.
685  unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
686  unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
687 
688  // Check if maximum number of SGPRs was explicitly requested using
689  // "amdgpu-num-sgpr" attribute.
690  if (F.hasFnAttribute("amdgpu-num-sgpr")) {
691  unsigned Requested = AMDGPU::getIntegerAttribute(
692  F, "amdgpu-num-sgpr", MaxNumSGPRs);
693 
694  // Make sure requested value does not violate subtarget's specifications.
695  if (Requested && (Requested <= ReservedNumSGPRs))
696  Requested = 0;
697 
698  // If more SGPRs are required to support the input user/system SGPRs,
699  // increase to accommodate them.
700  //
701  // FIXME: This really ends up using the requested number of SGPRs + number
702  // of reserved special registers in total. Theoretically you could re-use
703  // the last input registers for these special registers, but this would
704  // require a lot of complexity to deal with the weird aliasing.
705  unsigned InputNumSGPRs = PreloadedSGPRs;
706  if (Requested && Requested < InputNumSGPRs)
707  Requested = InputNumSGPRs;
708 
709  // Make sure requested value is compatible with values implied by
710  // default/requested minimum/maximum number of waves per execution unit.
711  if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
712  Requested = 0;
713  if (WavesPerEU.second &&
714  Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
715  Requested = 0;
716 
717  if (Requested)
718  MaxNumSGPRs = Requested;
719  }
720 
721  if (hasSGPRInitBug())
723 
724  return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);
725 }
726 
728  const Function &F = MF.getFunction();
731  getReservedNumSGPRs(MF));
732 }
733 
734 static unsigned getMaxNumPreloadedSGPRs() {
735  // Max number of user SGPRs
736  unsigned MaxUserSGPRs = 4 + // private segment buffer
737  2 + // Dispatch ptr
738  2 + // queue ptr
739  2 + // kernel segment ptr
740  2 + // dispatch ID
741  2 + // flat scratch init
742  2; // Implicit buffer ptr
743 
744  // Max number of system SGPRs
745  unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
746  1 + // WorkGroupIDY
747  1 + // WorkGroupIDZ
748  1 + // WorkGroupInfo
749  1; // private segment wave byte offset
750 
751  // Max number of synthetic SGPRs
752  unsigned SyntheticSGPRs = 1; // LDSKernelId
753 
754  return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;
755 }
756 
757 unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
760 }
761 
763  const Function &F, std::pair<unsigned, unsigned> WavesPerEU) const {
764  // Compute maximum number of VGPRs function can use using default/requested
765  // minimum number of waves per execution unit.
766  unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
767 
768  // Check if maximum number of VGPRs was explicitly requested using
769  // "amdgpu-num-vgpr" attribute.
770  if (F.hasFnAttribute("amdgpu-num-vgpr")) {
771  unsigned Requested = AMDGPU::getIntegerAttribute(
772  F, "amdgpu-num-vgpr", MaxNumVGPRs);
773 
774  if (hasGFX90AInsts())
775  Requested *= 2;
776 
777  // Make sure requested value is compatible with values implied by
778  // default/requested minimum/maximum number of waves per execution unit.
779  if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
780  Requested = 0;
781  if (WavesPerEU.second &&
782  Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
783  Requested = 0;
784 
785  if (Requested)
786  MaxNumVGPRs = Requested;
787  }
788 
789  return MaxNumVGPRs;
790 }
791 
792 unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
794 }
795 
797  const Function &F = MF.getFunction();
799  return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU());
800 }
801 
803  int UseOpIdx, SDep &Dep) const {
804  if (Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
805  !Def->isInstr() || !Use->isInstr())
806  return;
807 
808  MachineInstr *DefI = Def->getInstr();
809  MachineInstr *UseI = Use->getInstr();
810 
811  if (DefI->isBundle()) {
813  auto Reg = Dep.getReg();
816  unsigned Lat = 0;
817  for (++I; I != E && I->isBundledWithPred(); ++I) {
818  if (I->modifiesRegister(Reg, TRI))
819  Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
820  else if (Lat)
821  --Lat;
822  }
823  Dep.setLatency(Lat);
824  } else if (UseI->isBundle()) {
826  auto Reg = Dep.getReg();
829  unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *DefI);
830  for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
831  if (I->readsRegister(Reg, TRI))
832  break;
833  --Lat;
834  }
835  Dep.setLatency(Lat);
836  } else if (Dep.getLatency() == 0 && Dep.getReg() == AMDGPU::VCC_LO) {
837  // Work around the fact that SIInstrInfo::fixImplicitOperands modifies
838  // implicit operands which come from the MCInstrDesc, which can fool
839  // ScheduleDAGInstrs::addPhysRegDataDeps into treating them as implicit
840  // pseudo operands.
842  DefI, DefOpIdx, UseI, UseOpIdx));
843  }
844 }
845 
846 namespace {
847 struct FillMFMAShadowMutation : ScheduleDAGMutation {
848  const SIInstrInfo *TII;
849 
850  ScheduleDAGMI *DAG;
851 
852  FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}
853 
854  bool isSALU(const SUnit *SU) const {
855  const MachineInstr *MI = SU->getInstr();
856  return MI && TII->isSALU(*MI) && !MI->isTerminator();
857  }
858 
859  bool isVALU(const SUnit *SU) const {
860  const MachineInstr *MI = SU->getInstr();
861  return MI && TII->isVALU(*MI);
862  }
863 
864  // Link as many SALU instructions in chain as possible. Return the size
865  // of the chain. Links up to MaxChain instructions.
866  unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
867  SmallPtrSetImpl<SUnit *> &Visited) const {
868  SmallVector<SUnit *, 8> Worklist({To});
869  unsigned Linked = 0;
870 
871  while (!Worklist.empty() && MaxChain-- > 0) {
872  SUnit *SU = Worklist.pop_back_val();
873  if (!Visited.insert(SU).second)
874  continue;
875 
876  LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From);
877  dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');
878 
879  if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From))
880  if (DAG->addEdge(SU, SDep(From, SDep::Artificial)))
881  ++Linked;
882 
883  for (SDep &SI : From->Succs) {
884  SUnit *SUv = SI.getSUnit();
885  if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) &&
886  DAG->canAddEdge(SUv, SU))
887  DAG->addEdge(SUv, SDep(SU, SDep::Artificial));
888  }
889 
890  for (SDep &SI : SU->Succs) {
891  SUnit *Succ = SI.getSUnit();
892  if (Succ != SU && isSALU(Succ))
893  Worklist.push_back(Succ);
894  }
895  }
896 
897  return Linked;
898  }
899 
900  void apply(ScheduleDAGInstrs *DAGInstrs) override {
901  const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
902  if (!ST.hasMAIInsts())
903  return;
904  DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
905  const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
906  if (!TSchedModel || DAG->SUnits.empty())
907  return;
908 
909  // Scan for MFMA long latency instructions and try to add a dependency
910  // of available SALU instructions to give them a chance to fill MFMA
911  // shadow. That is desirable to fill MFMA shadow with SALU instructions
912  // rather than VALU to prevent power consumption bursts and throttle.
913  auto LastSALU = DAG->SUnits.begin();
914  auto E = DAG->SUnits.end();
915  SmallPtrSet<SUnit*, 32> Visited;
916  for (SUnit &SU : DAG->SUnits) {
917  MachineInstr &MAI = *SU.getInstr();
918  if (!TII->isMAI(MAI) ||
919  MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
920  MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64)
921  continue;
922 
923  unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;
924 
925  LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU);
926  dbgs() << "Need " << Lat
927  << " instructions to cover latency.\n");
928 
929  // Find up to Lat independent scalar instructions as early as
930  // possible such that they can be scheduled after this MFMA.
931  for ( ; Lat && LastSALU != E; ++LastSALU) {
932  if (Visited.count(&*LastSALU))
933  continue;
934 
935  if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) ||
936  !DAG->canAddEdge(&*LastSALU, &SU))
937  continue;
938 
939  Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
940  }
941  }
942  }
943 };
944 } // namespace
945 
947  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
948  Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
949 }
950 
951 std::unique_ptr<ScheduleDAGMutation>
953  return EnablePowerSched ? std::make_unique<FillMFMAShadowMutation>(&InstrInfo)
954  : nullptr;
955 }
956 
958  if (NSAThreshold.getNumOccurrences() > 0)
959  return std::max(NSAThreshold.getValue(), 2u);
960 
961  int Value = AMDGPU::getIntegerAttribute(MF.getFunction(), "amdgpu-nsa-threshold", -1);
962  if (Value > 0)
963  return std::max(Value, 2);
964 
965  return 3;
966 }
967 
970  return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
971  else
972  return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<R600Subtarget>());
973 }
974 
976  if (TM.getTargetTriple().getArch() == Triple::amdgcn)
977  return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
978  else
979  return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
980 }
llvm::MachineSchedPolicy::OnlyBottomUp
bool OnlyBottomUp
Definition: MachineScheduler.h:191
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr
Align getAlignmentForImplicitArgPtr() const
Definition: AMDGPUSubtarget.h:212
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:60
llvm::AMDGPURegisterBankInfo
Definition: AMDGPURegisterBankInfo.h:42
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
TargetFrameLowering.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition: AMDGPUMetadata.h:258
llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:43
llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:834
llvm::Wave32
@ Wave32
Definition: AMDGPUMCTargetDesc.h:31
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28
llvm::AMDGPUSubtarget::HasFminFmaxLegacy
bool HasFminFmaxLegacy
Definition: AMDGPUSubtarget.h:61
SIMachineFunctionInfo.h
llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:64
llvm::Function
Definition: Function.h:60
llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:830
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:74
llvm::AMDGPUSubtarget::getMinWavesPerEU
virtual unsigned getMinWavesPerEU() const =0
llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:305
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::GlobalObject::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1289
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::Wave64
@ Wave64
Definition: AMDGPUMCTargetDesc.h:31
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::AMDGPU::getAmdhsaCodeObjectVersion
unsigned getAmdhsaCodeObjectVersion()
Definition: AMDGPUBaseInfo.cpp:149
llvm::ScheduleDAGInstrs::addEdge
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Definition: ScheduleDAGInstrs.cpp:1199
llvm::AMDGPUSubtarget::getOccupancyWithLocalMemSize
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
Definition: AMDGPUSubtarget.cpp:319
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::X86AS::FS
@ FS
Definition: X86.h:200
EnableVGPRIndexMode
static cl::opt< bool > EnableVGPRIndexMode("amdgpu-vgpr-index-mode", cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false))
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:255
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:59
llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize
virtual unsigned getMaxFlatWorkGroupSize() const =0
getMaxNumPreloadedSGPRs
static unsigned getMaxNumPreloadedSGPRs()
Definition: AMDGPUSubtarget.cpp:734
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs
unsigned getNumPreloadedSGPRs() const
Definition: SIMachineFunctionInfo.h:763
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:552
UseOpIdx
unsigned UseOpIdx
Definition: RISCVInsertVSETVLI.cpp:600
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:228
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::AMDGPULegalizerInfo
This class provides the information for the target register banks.
Definition: AMDGPULegalizerInfo.h:31
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:1073
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:120
llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:117
llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:575
InstrInfo
return InstrInfo
Definition: RISCVInsertVSETVLI.cpp:668
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1300
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:968
llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1808
InlineAsmLowering.h
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1159
llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:596
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:329
SmallString.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SIInstrInfo::getSchedModel
const TargetSchedModel & getSchedModel() const
Definition: SIInstrInfo.h:1170
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:755
llvm::Legalizer
Definition: Legalizer.h:36
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
getReqdWorkGroupSize
static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim)
Definition: AMDGPUSubtarget.cpp:433
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:224
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:128
SI
@ SI
Definition: SIInstrInfo.cpp:7966
llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:602
llvm::AMDGPUSubtarget::getImplicitArgNumBytes
unsigned getImplicitArgNumBytes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:514
llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:172
llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition: AMDGPUSubtarget.cpp:680
AMDGPUSubtarget.h
llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
MCSubtargetInfo.h
llvm::AMDGPUSubtarget::isMesaKernel
bool isMesaKernel(const Function &F) const
Definition: AMDGPUSubtarget.cpp:440
llvm::Instruction
Definition: Instruction.h:42
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:948
llvm::SIInstrInfo::getInstrLatency
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
Definition: SIInstrInfo.cpp:8322
MDBuilder.h
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::getSramEccSetting
TargetIDSetting getSramEccSetting() const
Definition: AMDGPUBaseInfo.h:169
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::AMDGPUSubtarget::getMaxWorkitemID
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition: AMDGPUSubtarget.cpp:444
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:214
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:354
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1229
llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:62
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Definition: AMDGPUSubtarget.h:208
NSAThreshold
static cl::opt< unsigned > NSAThreshold("amdgpu-nsa-threshold", cl::desc("Number of addresses from which to enable MIMG NSA."), cl::init(3), cl::Hidden)
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::SmallString< 256 >
llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(const Triple &TT)
Definition: AMDGPUSubtarget.cpp:161
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:210
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::AMDGPUDwarfFlavour
AMDGPUDwarfFlavour
Definition: AMDGPUMCTargetDesc.h:31
llvm::cl::opt< bool >
llvm::AMDGPUSubtarget::WavefrontSizeLog2
char WavefrontSizeLog2
Definition: AMDGPUSubtarget.h:66
llvm::AMDGPUSubtarget::makeLIDRangeMetadata
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition: AMDGPUSubtarget.cpp:452
llvm::R600Subtarget
Definition: R600Subtarget.h:29
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
AMDGPURegisterBankInfo.h
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
llvm::omp::Kernel
Function * Kernel
Summary of a kernel (=entry point for target offloading).
Definition: OpenMPOpt.h:21
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:189
llvm::AMDGPU::getIntegerAttribute
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
Definition: AMDGPUBaseInfo.cpp:1139
llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1212
llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition: AMDGPUSubtarget.cpp:163
llvm::AMDGPUCallLowering
Definition: AMDGPUCallLowering.h:26
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:333
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
AMDGPUInstructionSelector.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
llvm::AMDGPUInstructionSelector
Definition: AMDGPUInstructionSelector.h:47
llvm::ScheduleDAGInstrs::canAddEdge
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
Definition: ScheduleDAGInstrs.cpp:1195
llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition: AMDGPUBaseInfo.h:99
llvm::AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition: AMDGPUSubtarget.cpp:306
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::getXnackSetting
TargetIDSetting getXnackSetting() const
Definition: AMDGPUBaseInfo.h:140
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1223
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1050
llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:628
llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:273
llvm::TargetSchedModel::computeOperandLatency
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
Definition: TargetSchedule.cpp:168
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
UseAA
static cl::opt< bool > UseAA("amdgpu-use-aa-in-codegen", cl::desc("Enable the use of AA during codegen."), cl::init(true))
llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:63
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::AMDGPUSubtarget::MaxWavesPerEU
unsigned MaxWavesPerEU
Definition: AMDGPUSubtarget.h:64
llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition: AMDGPUSubtarget.h:218
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:291
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:73
llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize
virtual unsigned getMinFlatWorkGroupSize() const =0
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
llvm::AMDGPUSubtarget::getAMDGPUDwarfFlavour
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
Definition: AMDGPUSubtarget.cpp:570
llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
uint32_t
llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:592
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Definition: AMDGPUSubtarget.cpp:802
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1153
llvm::MachineSchedPolicy::ShouldTrackPressure
bool ShouldTrackPressure
Definition: MachineScheduler.h:183
llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
Definition: AMDGPUSubtarget.cpp:946
llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:563
llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:72
llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:654
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1202
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:201
llvm::AMDGPUSubtarget::getExplicitKernArgSize
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:530
llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition: AMDGPUSubtarget.h:32
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
AMDGPUGenSubtargetInfo
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::AMDGPUSubtarget::HasSMulHi
bool HasSMulHi
Definition: AMDGPUSubtarget.h:59
llvm::MachineSchedPolicy::OnlyTopDown
bool OnlyTopDown
Definition: MachineScheduler.h:190
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
EnablePowerSched
static cl::opt< bool > EnablePowerSched("amdgpu-enable-power-sched", cl::desc("Enable scheduling to minimize mAI power bursts"), cl::init(false))
llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:600
R600Subtarget.h
llvm::SIMachineFunctionInfo::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU() const
Definition: SIMachineFunctionInfo.h:906
llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition: AMDGPUSubtarget.cpp:208
llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
llvm::MachineInstr::isBundle
bool isBundle() const
Definition: MachineInstr.h:1332
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:219
llvm::MDBuilder
Definition: MDBuilder.h:36
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:267
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::AMDGPUSubtarget::LocalMemorySize
unsigned LocalMemorySize
Definition: AMDGPUSubtarget.h:65
AMDGPULegalizerInfo.h
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:551
AMDGPUCallLowering.h
llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:116
MachineScheduler.h
llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1158
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:184
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
Definition: AMDGPUSubtarget.cpp:638
llvm::GCNSubtarget::getNSAThreshold
unsigned getNSAThreshold(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:957
llvm::MDBuilder::createRange
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition: MDBuilder.cpp:84
llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override
llvm::AMDGPUSubtarget::getDefaultFlatWorkGroupSize
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Definition: AMDGPUSubtarget.cpp:362
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:338
llvm::AMDGPU::IsaInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:842
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:125
llvm::MachineSchedPolicy::ShouldTrackLaneMasks
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
Definition: MachineScheduler.h:186
llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:940
llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:261
llvm::ScheduleDAGInstrs::getSchedModel
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
Definition: ScheduleDAGInstrs.h:263
llvm::ScheduleDAGMutation
Mutate the DAG as a postpass after normal DAG building.
Definition: ScheduleDAGMutation.h:22
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumVGPRs
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
Definition: AMDGPUMetadata.h:260
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setTargetIDFromFeaturesString
void setTargetIDFromFeaturesString(StringRef FS)
Definition: AMDGPUBaseInfo.cpp:610
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::createFillMFMAShadowMutation
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
Definition: AMDGPUSubtarget.cpp:952
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:181
llvm::AMDGPUSubtarget::getFlatWorkGroupSizes
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:376
llvm::cl::desc
Definition: CommandLine.h:413
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:120
llvm::AMDGPU::getIntegerPairAttribute
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
Definition: AMDGPUBaseInfo.cpp:1154
llvm::AMDGPUSubtarget::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Definition: AMDGPUSubtarget.h:96
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:667
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:685
llvm::ScheduleDAG::ExitSU
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:563
AMDGPUTargetMachine.h
llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
llvm::SIInstrInfo::pseudoToMCOpcode
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
Definition: SIInstrInfo.cpp:8017
llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
Definition: AMDGPUSubtarget.cpp:762
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
AMDGPUBaseInfo.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365