LLVM  15.0.0git
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Implements the AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUSubtarget.h"
15 #include "AMDGPUCallLowering.h"
17 #include "AMDGPULegalizerInfo.h"
18 #include "AMDGPURegisterBankInfo.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "R600Subtarget.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/SmallString.h"
27 #include "llvm/IR/IntrinsicsAMDGPU.h"
28 #include "llvm/IR/IntrinsicsR600.h"
29 #include "llvm/IR/MDBuilder.h"
31 #include <algorithm>
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "amdgpu-subtarget"
36 
37 #define GET_SUBTARGETINFO_TARGET_DESC
38 #define GET_SUBTARGETINFO_CTOR
39 #define AMDGPUSubtarget GCNSubtarget
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #undef AMDGPUSubtarget
42 
44  "amdgpu-disable-power-sched",
45  cl::desc("Disable scheduling to minimize mAI power bursts"),
46  cl::init(false));
47 
49  "amdgpu-vgpr-index-mode",
50  cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
51  cl::init(false));
52 
53 static cl::opt<bool> UseAA("amdgpu-use-aa-in-codegen",
54  cl::desc("Enable the use of AA during codegen."),
55  cl::init(true));
56 
57 GCNSubtarget::~GCNSubtarget() = default;
58 
61  StringRef GPU, StringRef FS) {
62  // Determine default and user-specified characteristics
63  //
64  // We want to be able to turn these off, but making this a subtarget feature
65  // for SI has the unhelpful behavior that it unsets everything else if you
66  // disable it.
67  //
68  // Similarly we want enable-prt-strict-null to be on by default and not to
69  // unset everything else if it is disabled
70 
71  SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,");
72 
73  // Turn on features that HSA ABI requires. Also turn on FlatForGlobal by default
74  if (isAmdHsaOS())
75  FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";
76 
77  FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
78 
79  // Disable mutually exclusive bits.
80  if (FS.contains_insensitive("+wavefrontsize")) {
81  if (!FS.contains_insensitive("wavefrontsize16"))
82  FullFS += "-wavefrontsize16,";
83  if (!FS.contains_insensitive("wavefrontsize32"))
84  FullFS += "-wavefrontsize32,";
85  if (!FS.contains_insensitive("wavefrontsize64"))
86  FullFS += "-wavefrontsize64,";
87  }
88 
89  FullFS += FS;
90 
91  ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
92 
93  // Implement the "generic" processors, which acts as the default when no
94  // generation features are enabled (e.g for -mcpu=''). HSA OS defaults to
95  // the first amdgcn target that supports flat addressing. Other OSes defaults
96  // to the first amdgcn target.
100  }
101 
102  // We don't support FP64 for EG/NI atm.
104 
105  // Targets must either support 64-bit offsets for MUBUF instructions, and/or
106  // support flat operations, otherwise they cannot access a 64-bit global
107  // address space
108  assert(hasAddr64() || hasFlat());
109  // Unless +-flat-for-global is specified, turn on FlatForGlobal for targets
110  // that do not support ADDR64 variants of MUBUF instructions. Such targets
111  // cannot use a 64 bit offset with a MUBUF instruction to access the global
112  // address space
113  if (!hasAddr64() && !FS.contains("flat-for-global") && !FlatForGlobal) {
114  ToggleFeature(AMDGPU::FeatureFlatForGlobal);
115  FlatForGlobal = true;
116  }
117  // Unless +-flat-for-global is specified, use MUBUF instructions for global
118  // address space access if flat operations are not available.
119  if (!hasFlat() && !FS.contains("flat-for-global") && FlatForGlobal) {
120  ToggleFeature(AMDGPU::FeatureFlatForGlobal);
121  FlatForGlobal = false;
122  }
123 
124  // Set defaults if needed.
125  if (MaxPrivateElementSize == 0)
127 
128  if (LDSBankCount == 0)
129  LDSBankCount = 32;
130 
131  if (TT.getArch() == Triple::amdgcn) {
132  if (LocalMemorySize == 0)
133  LocalMemorySize = 32768;
134 
135  // Do something sensible for unspecified target.
136  if (!HasMovrel && !HasVGPRIndexMode)
137  HasMovrel = true;
138  }
139 
140  // Don't crash on invalid devices.
141  if (WavefrontSizeLog2 == 0)
142  WavefrontSizeLog2 = 5;
143 
146 
148 
149  LLVM_DEBUG(dbgs() << "xnack setting for subtarget: "
150  << TargetID.getXnackSetting() << '\n');
151  LLVM_DEBUG(dbgs() << "sramecc setting for subtarget: "
152  << TargetID.getSramEccSetting() << '\n');
153 
154  return *this;
155 }
156 
157 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {}
158 
160  const GCNTargetMachine &TM)
161  : // clang-format off
162  AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS),
163  AMDGPUSubtarget(TT),
164  TargetTriple(TT),
165  TargetID(*this),
166  InstrItins(getInstrItineraryForCPU(GPU)),
167  InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
168  TLInfo(TM, *this),
169  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
170  // clang-format on
172  CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
173  InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
174  Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
175  RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
176  InstSelector.reset(new AMDGPUInstructionSelector(
177  *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
178 }
179 
180 unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
181  if (getGeneration() < GFX10)
182  return 1;
183 
184  switch (Opcode) {
185  case AMDGPU::V_LSHLREV_B64_e64:
186  case AMDGPU::V_LSHLREV_B64_gfx10:
187  case AMDGPU::V_LSHLREV_B64_e64_gfx11:
188  case AMDGPU::V_LSHL_B64_e64:
189  case AMDGPU::V_LSHRREV_B64_e64:
190  case AMDGPU::V_LSHRREV_B64_gfx10:
191  case AMDGPU::V_LSHRREV_B64_e64_gfx11:
192  case AMDGPU::V_LSHR_B64_e64:
193  case AMDGPU::V_ASHRREV_I64_e64:
194  case AMDGPU::V_ASHRREV_I64_gfx10:
195  case AMDGPU::V_ASHRREV_I64_e64_gfx11:
196  case AMDGPU::V_ASHR_I64_e64:
197  return 1;
198  }
199 
200  return 2;
201 }
202 
203 /// This list was mostly derived from experimentation.
204 bool GCNSubtarget::zeroesHigh16BitsOfDest(unsigned Opcode) const {
205  switch (Opcode) {
206  case AMDGPU::V_CVT_F16_F32_e32:
207  case AMDGPU::V_CVT_F16_F32_e64:
208  case AMDGPU::V_CVT_F16_U16_e32:
209  case AMDGPU::V_CVT_F16_U16_e64:
210  case AMDGPU::V_CVT_F16_I16_e32:
211  case AMDGPU::V_CVT_F16_I16_e64:
212  case AMDGPU::V_RCP_F16_e64:
213  case AMDGPU::V_RCP_F16_e32:
214  case AMDGPU::V_RSQ_F16_e64:
215  case AMDGPU::V_RSQ_F16_e32:
216  case AMDGPU::V_SQRT_F16_e64:
217  case AMDGPU::V_SQRT_F16_e32:
218  case AMDGPU::V_LOG_F16_e64:
219  case AMDGPU::V_LOG_F16_e32:
220  case AMDGPU::V_EXP_F16_e64:
221  case AMDGPU::V_EXP_F16_e32:
222  case AMDGPU::V_SIN_F16_e64:
223  case AMDGPU::V_SIN_F16_e32:
224  case AMDGPU::V_COS_F16_e64:
225  case AMDGPU::V_COS_F16_e32:
226  case AMDGPU::V_FLOOR_F16_e64:
227  case AMDGPU::V_FLOOR_F16_e32:
228  case AMDGPU::V_CEIL_F16_e64:
229  case AMDGPU::V_CEIL_F16_e32:
230  case AMDGPU::V_TRUNC_F16_e64:
231  case AMDGPU::V_TRUNC_F16_e32:
232  case AMDGPU::V_RNDNE_F16_e64:
233  case AMDGPU::V_RNDNE_F16_e32:
234  case AMDGPU::V_FRACT_F16_e64:
235  case AMDGPU::V_FRACT_F16_e32:
236  case AMDGPU::V_FREXP_MANT_F16_e64:
237  case AMDGPU::V_FREXP_MANT_F16_e32:
238  case AMDGPU::V_FREXP_EXP_I16_F16_e64:
239  case AMDGPU::V_FREXP_EXP_I16_F16_e32:
240  case AMDGPU::V_LDEXP_F16_e64:
241  case AMDGPU::V_LDEXP_F16_e32:
242  case AMDGPU::V_LSHLREV_B16_e64:
243  case AMDGPU::V_LSHLREV_B16_e32:
244  case AMDGPU::V_LSHRREV_B16_e64:
245  case AMDGPU::V_LSHRREV_B16_e32:
246  case AMDGPU::V_ASHRREV_I16_e64:
247  case AMDGPU::V_ASHRREV_I16_e32:
248  case AMDGPU::V_ADD_U16_e64:
249  case AMDGPU::V_ADD_U16_e32:
250  case AMDGPU::V_SUB_U16_e64:
251  case AMDGPU::V_SUB_U16_e32:
252  case AMDGPU::V_SUBREV_U16_e64:
253  case AMDGPU::V_SUBREV_U16_e32:
254  case AMDGPU::V_MUL_LO_U16_e64:
255  case AMDGPU::V_MUL_LO_U16_e32:
256  case AMDGPU::V_ADD_F16_e64:
257  case AMDGPU::V_ADD_F16_e32:
258  case AMDGPU::V_SUB_F16_e64:
259  case AMDGPU::V_SUB_F16_e32:
260  case AMDGPU::V_SUBREV_F16_e64:
261  case AMDGPU::V_SUBREV_F16_e32:
262  case AMDGPU::V_MUL_F16_e64:
263  case AMDGPU::V_MUL_F16_e32:
264  case AMDGPU::V_MAX_F16_e64:
265  case AMDGPU::V_MAX_F16_e32:
266  case AMDGPU::V_MIN_F16_e64:
267  case AMDGPU::V_MIN_F16_e32:
268  case AMDGPU::V_MAX_U16_e64:
269  case AMDGPU::V_MAX_U16_e32:
270  case AMDGPU::V_MIN_U16_e64:
271  case AMDGPU::V_MIN_U16_e32:
272  case AMDGPU::V_MAX_I16_e64:
273  case AMDGPU::V_MAX_I16_e32:
274  case AMDGPU::V_MIN_I16_e64:
275  case AMDGPU::V_MIN_I16_e32:
276  case AMDGPU::V_MAD_F16_e64:
277  case AMDGPU::V_MAD_U16_e64:
278  case AMDGPU::V_MAD_I16_e64:
279  case AMDGPU::V_FMA_F16_e64:
280  case AMDGPU::V_DIV_FIXUP_F16_e64:
281  // On gfx10, all 16-bit instructions preserve the high bits.
283  case AMDGPU::V_MADAK_F16:
284  case AMDGPU::V_MADMK_F16:
285  case AMDGPU::V_MAC_F16_e64:
286  case AMDGPU::V_MAC_F16_e32:
287  case AMDGPU::V_FMAMK_F16:
288  case AMDGPU::V_FMAAK_F16:
289  case AMDGPU::V_FMAC_F16_e64:
290  case AMDGPU::V_FMAC_F16_e32:
291  // In gfx9, the preferred handling of the unused high 16-bits changed. Most
292  // instructions maintain the legacy behavior of 0ing. Some instructions
293  // changed to preserving the high bits.
295  case AMDGPU::V_MAD_MIXLO_F16:
296  case AMDGPU::V_MAD_MIXHI_F16:
297  default:
298  return false;
299  }
300 }
301 
303  const Function &F) const {
304  if (NWaves == 1)
305  return getLocalMemorySize();
306  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
307  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
308  if (!WorkGroupsPerCu)
309  return 0;
310  unsigned MaxWaves = getMaxWavesPerEU();
311  return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
312 }
313 
314 // FIXME: Should return min,max range.
316  const Function &F) const {
317  const unsigned MaxWorkGroupSize = getFlatWorkGroupSizes(F).second;
318  const unsigned MaxWorkGroupsPerCu = getMaxWorkGroupsPerCU(MaxWorkGroupSize);
319  if (!MaxWorkGroupsPerCu)
320  return 0;
321 
322  const unsigned WaveSize = getWavefrontSize();
323 
324  // FIXME: Do we need to account for alignment requirement of LDS rounding the
325  // size up?
326  // Compute restriction based on LDS usage
327  unsigned NumGroups = getLocalMemorySize() / (Bytes ? Bytes : 1u);
328 
329  // This can be queried with more LDS than is possible, so just assume the
330  // worst.
331  if (NumGroups == 0)
332  return 1;
333 
334  NumGroups = std::min(MaxWorkGroupsPerCu, NumGroups);
335 
336  // Round to the number of waves.
337  const unsigned MaxGroupNumWaves = (MaxWorkGroupSize + WaveSize - 1) / WaveSize;
338  unsigned MaxWaves = NumGroups * MaxGroupNumWaves;
339 
340  // Clamp to the maximum possible number of waves.
341  MaxWaves = std::min(MaxWaves, getMaxWavesPerEU());
342 
343  // FIXME: Needs to be a multiple of the group size?
344  //MaxWaves = MaxGroupNumWaves * (MaxWaves / MaxGroupNumWaves);
345 
346  assert(MaxWaves > 0 && MaxWaves <= getMaxWavesPerEU() &&
347  "computed invalid occupancy");
348  return MaxWaves;
349 }
350 
351 unsigned
353  const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
354  return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
355 }
356 
357 std::pair<unsigned, unsigned>
359  switch (CC) {
366  return std::make_pair(1, getWavefrontSize());
367  default:
368  return std::make_pair(1u, getMaxFlatWorkGroupSize());
369  }
370 }
371 
372 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
373  const Function &F) const {
374  // Default minimum/maximum flat work group sizes.
375  std::pair<unsigned, unsigned> Default =
376  getDefaultFlatWorkGroupSize(F.getCallingConv());
377 
378  // Requested minimum/maximum flat work group sizes.
379  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
380  F, "amdgpu-flat-work-group-size", Default);
381 
382  // Make sure requested minimum is less than requested maximum.
383  if (Requested.first > Requested.second)
384  return Default;
385 
386  // Make sure requested values do not violate subtarget's specifications.
387  if (Requested.first < getMinFlatWorkGroupSize())
388  return Default;
389  if (Requested.second > getMaxFlatWorkGroupSize())
390  return Default;
391 
392  return Requested;
393 }
394 
395 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
396  const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
397  // Default minimum/maximum number of waves per execution unit.
398  std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
399 
400  // If minimum/maximum flat work group sizes were explicitly requested using
401  // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
402  // number of waves per execution unit to values implied by requested
403  // minimum/maximum flat work group sizes.
404  unsigned MinImpliedByFlatWorkGroupSize =
405  getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);
406  Default.first = MinImpliedByFlatWorkGroupSize;
407 
408  // Requested minimum/maximum number of waves per execution unit.
409  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
410  F, "amdgpu-waves-per-eu", Default, true);
411 
412  // Make sure requested minimum is less than requested maximum.
413  if (Requested.second && Requested.first > Requested.second)
414  return Default;
415 
416  // Make sure requested values do not violate subtarget's specifications.
417  if (Requested.first < getMinWavesPerEU() ||
418  Requested.second > getMaxWavesPerEU())
419  return Default;
420 
421  // Make sure requested values are compatible with values implied by requested
422  // minimum/maximum flat work group sizes.
423  if (Requested.first < MinImpliedByFlatWorkGroupSize)
424  return Default;
425 
426  return Requested;
427 }
428 
429 static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim) {
430  auto Node = Kernel.getMetadata("reqd_work_group_size");
431  if (Node && Node->getNumOperands() == 3)
432  return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
434 }
435 
437  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
438 }
439 
441  unsigned Dimension) const {
442  unsigned ReqdSize = getReqdWorkGroupSize(Kernel, Dimension);
443  if (ReqdSize != std::numeric_limits<unsigned>::max())
444  return ReqdSize - 1;
445  return getFlatWorkGroupSizes(Kernel).second - 1;
446 }
447 
449  Function *Kernel = I->getParent()->getParent();
450  unsigned MinSize = 0;
451  unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
452  bool IdQuery = false;
453 
454  // If reqd_work_group_size is present it narrows value down.
455  if (auto *CI = dyn_cast<CallInst>(I)) {
456  const Function *F = CI->getCalledFunction();
457  if (F) {
458  unsigned Dim = UINT_MAX;
459  switch (F->getIntrinsicID()) {
460  case Intrinsic::amdgcn_workitem_id_x:
461  case Intrinsic::r600_read_tidig_x:
462  IdQuery = true;
464  case Intrinsic::r600_read_local_size_x:
465  Dim = 0;
466  break;
467  case Intrinsic::amdgcn_workitem_id_y:
468  case Intrinsic::r600_read_tidig_y:
469  IdQuery = true;
471  case Intrinsic::r600_read_local_size_y:
472  Dim = 1;
473  break;
474  case Intrinsic::amdgcn_workitem_id_z:
475  case Intrinsic::r600_read_tidig_z:
476  IdQuery = true;
478  case Intrinsic::r600_read_local_size_z:
479  Dim = 2;
480  break;
481  default:
482  break;
483  }
484 
485  if (Dim <= 3) {
486  unsigned ReqdSize = getReqdWorkGroupSize(*Kernel, Dim);
487  if (ReqdSize != std::numeric_limits<unsigned>::max())
488  MinSize = MaxSize = ReqdSize;
489  }
490  }
491  }
492 
493  if (!MaxSize)
494  return false;
495 
496  // Range metadata is [Lo, Hi). For ID query we need to pass max size
497  // as Hi. For size query we need to pass Hi + 1.
498  if (IdQuery)
499  MinSize = 0;
500  else
501  ++MaxSize;
502 
503  MDBuilder MDB(I->getContext());
504  MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
505  APInt(32, MaxSize));
506  I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
507  return true;
508 }
509 
511  assert(AMDGPU::isKernel(F.getCallingConv()));
512 
513  // We don't allocate the segment if we know the implicit arguments weren't
514  // used, even if the ABI implies we need them.
515  if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
516  return 0;
517 
518  if (isMesaKernel(F))
519  return 16;
520 
521  // Assume all implicit inputs are used by default
522  unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56;
523  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", NBytes);
524 }
525 
527  Align &MaxAlign) const {
528  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
529  F.getCallingConv() == CallingConv::SPIR_KERNEL);
530 
531  const DataLayout &DL = F.getParent()->getDataLayout();
532  uint64_t ExplicitArgBytes = 0;
533  MaxAlign = Align(1);
534 
535  for (const Argument &Arg : F.args()) {
536  const bool IsByRef = Arg.hasByRefAttr();
537  Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
538  Align Alignment = DL.getValueOrABITypeAlignment(
539  IsByRef ? Arg.getParamAlign() : None, ArgTy);
540  uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
541  ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
542  MaxAlign = std::max(MaxAlign, Alignment);
543  }
544 
545  return ExplicitArgBytes;
546 }
547 
549  Align &MaxAlign) const {
550  uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
551 
552  unsigned ExplicitOffset = getExplicitKernelArgOffset(F);
553 
554  uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
555  unsigned ImplicitBytes = getImplicitArgNumBytes(F);
556  if (ImplicitBytes != 0) {
557  const Align Alignment = getAlignmentForImplicitArgPtr();
558  TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
559  MaxAlign = std::max(MaxAlign, Alignment);
560  }
561 
562  // Being able to dereference past the end is useful for emitting scalar loads.
563  return alignTo(TotalSize, 4);
564 }
565 
569 }
570 
572  unsigned NumRegionInstrs) const {
573  // Track register pressure so the scheduler can try to decrease
574  // pressure once register usage is above the threshold defined by
575  // SIRegisterInfo::getRegPressureSetLimit()
576  Policy.ShouldTrackPressure = true;
577 
578  // Enabling both top down and bottom up scheduling seems to give us less
579  // register spills than just using one of these approaches on its own.
580  Policy.OnlyTopDown = false;
581  Policy.OnlyBottomUp = false;
582 
583  // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
584  if (!enableSIScheduler())
585  Policy.ShouldTrackLaneMasks = true;
586 }
587 
589  return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1;
590 }
591 
593  return !hasMovrel() || (EnableVGPRIndexMode && hasVGPRIndexMode());
594 }
595 
596 bool GCNSubtarget::useAA() const { return UseAA; }
597 
598 unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
600  return getMaxWavesPerEU();
601 
603  if (SGPRs <= 80)
604  return 10;
605  if (SGPRs <= 88)
606  return 9;
607  if (SGPRs <= 100)
608  return 8;
609  return 7;
610  }
611  if (SGPRs <= 48)
612  return 10;
613  if (SGPRs <= 56)
614  return 9;
615  if (SGPRs <= 64)
616  return 8;
617  if (SGPRs <= 72)
618  return 7;
619  if (SGPRs <= 80)
620  return 6;
621  return 5;
622 }
623 
624 unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
625  unsigned MaxWaves = getMaxWavesPerEU();
626  unsigned Granule = getVGPRAllocGranule();
627  if (VGPRs < Granule)
628  return MaxWaves;
629  unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule;
630  return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves);
631 }
632 
633 unsigned
634 GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratch) const {
636  return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
637 
638  if (HasFlatScratch || HasArchitectedFlatScratch) {
640  return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
642  return 4; // FLAT_SCRATCH, VCC (in that order).
643  }
644 
645  if (isXNACKEnabled())
646  return 4; // XNACK, VCC (in that order).
647  return 2; // VCC.
648 }
649 
653 }
654 
656  // In principle we do not need to reserve SGPR pair used for flat_scratch if
657  // we know flat instructions do not access the stack anywhere in the
658  // program. For now assume it's needed if we have flat instructions.
659  const bool KernelUsesFlatScratch = hasFlatAddressSpace();
660  return getBaseReservedNumSGPRs(KernelUsesFlatScratch);
661 }
662 
663 unsigned GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize,
664  unsigned NumSGPRs,
665  unsigned NumVGPRs) const {
666  unsigned Occupancy =
668  getOccupancyWithLocalMemSize(LDSSize, F));
669  if (NumSGPRs)
670  Occupancy = std::min(Occupancy, getOccupancyWithNumSGPRs(NumSGPRs));
671  if (NumVGPRs)
672  Occupancy = std::min(Occupancy, getOccupancyWithNumVGPRs(NumVGPRs));
673  return Occupancy;
674 }
675 
677  const Function &F, std::pair<unsigned, unsigned> WavesPerEU,
678  unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const {
679  // Compute maximum number of SGPRs function can use using default/requested
680  // minimum number of waves per execution unit.
681  unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
682  unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
683 
684  // Check if maximum number of SGPRs was explicitly requested using
685  // "amdgpu-num-sgpr" attribute.
686  if (F.hasFnAttribute("amdgpu-num-sgpr")) {
687  unsigned Requested = AMDGPU::getIntegerAttribute(
688  F, "amdgpu-num-sgpr", MaxNumSGPRs);
689 
690  // Make sure requested value does not violate subtarget's specifications.
691  if (Requested && (Requested <= ReservedNumSGPRs))
692  Requested = 0;
693 
694  // If more SGPRs are required to support the input user/system SGPRs,
695  // increase to accommodate them.
696  //
697  // FIXME: This really ends up using the requested number of SGPRs + number
698  // of reserved special registers in total. Theoretically you could re-use
699  // the last input registers for these special registers, but this would
700  // require a lot of complexity to deal with the weird aliasing.
701  unsigned InputNumSGPRs = PreloadedSGPRs;
702  if (Requested && Requested < InputNumSGPRs)
703  Requested = InputNumSGPRs;
704 
705  // Make sure requested value is compatible with values implied by
706  // default/requested minimum/maximum number of waves per execution unit.
707  if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
708  Requested = 0;
709  if (WavesPerEU.second &&
710  Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
711  Requested = 0;
712 
713  if (Requested)
714  MaxNumSGPRs = Requested;
715  }
716 
717  if (hasSGPRInitBug())
719 
720  return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);
721 }
722 
724  const Function &F = MF.getFunction();
727  getReservedNumSGPRs(MF));
728 }
729 
730 static unsigned getMaxNumPreloadedSGPRs() {
731  // Max number of user SGPRs
732  unsigned MaxUserSGPRs = 4 + // private segment buffer
733  2 + // Dispatch ptr
734  2 + // queue ptr
735  2 + // kernel segment ptr
736  2 + // dispatch ID
737  2 + // flat scratch init
738  2; // Implicit buffer ptr
739  // Max number of system SGPRs
740  unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
741  1 + // WorkGroupIDY
742  1 + // WorkGroupIDZ
743  1 + // WorkGroupInfo
744  1; // private segment wave byte offset
745  return MaxUserSGPRs + MaxSystemSGPRs;
746 }
747 
748 unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
751 }
752 
754  const Function &F, std::pair<unsigned, unsigned> WavesPerEU) const {
755  // Compute maximum number of VGPRs function can use using default/requested
756  // minimum number of waves per execution unit.
757  unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
758 
759  // Check if maximum number of VGPRs was explicitly requested using
760  // "amdgpu-num-vgpr" attribute.
761  if (F.hasFnAttribute("amdgpu-num-vgpr")) {
762  unsigned Requested = AMDGPU::getIntegerAttribute(
763  F, "amdgpu-num-vgpr", MaxNumVGPRs);
764 
765  if (hasGFX90AInsts())
766  Requested *= 2;
767 
768  // Make sure requested value is compatible with values implied by
769  // default/requested minimum/maximum number of waves per execution unit.
770  if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
771  Requested = 0;
772  if (WavesPerEU.second &&
773  Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
774  Requested = 0;
775 
776  if (Requested)
777  MaxNumVGPRs = Requested;
778  }
779 
780  return MaxNumVGPRs;
781 }
782 
783 unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
785 }
786 
788  const Function &F = MF.getFunction();
790  return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU());
791 }
792 
794  int UseOpIdx, SDep &Dep) const {
795  if (Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
796  !Def->isInstr() || !Use->isInstr())
797  return;
798 
799  MachineInstr *DefI = Def->getInstr();
800  MachineInstr *UseI = Use->getInstr();
801 
802  if (DefI->isBundle()) {
804  auto Reg = Dep.getReg();
807  unsigned Lat = 0;
808  for (++I; I != E && I->isBundledWithPred(); ++I) {
809  if (I->modifiesRegister(Reg, TRI))
810  Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
811  else if (Lat)
812  --Lat;
813  }
814  Dep.setLatency(Lat);
815  } else if (UseI->isBundle()) {
817  auto Reg = Dep.getReg();
820  unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *DefI);
821  for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
822  if (I->readsRegister(Reg, TRI))
823  break;
824  --Lat;
825  }
826  Dep.setLatency(Lat);
827  } else if (Dep.getLatency() == 0 && Dep.getReg() == AMDGPU::VCC_LO) {
828  // Work around the fact that SIInstrInfo::fixImplicitOperands modifies
829  // implicit operands which come from the MCInstrDesc, which can fool
830  // ScheduleDAGInstrs::addPhysRegDataDeps into treating them as implicit
831  // pseudo operands.
833  DefI, DefOpIdx, UseI, UseOpIdx));
834  }
835 }
836 
837 namespace {
838 struct FillMFMAShadowMutation : ScheduleDAGMutation {
839  const SIInstrInfo *TII;
840 
841  ScheduleDAGMI *DAG;
842 
843  FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}
844 
845  bool isSALU(const SUnit *SU) const {
846  const MachineInstr *MI = SU->getInstr();
847  return MI && TII->isSALU(*MI) && !MI->isTerminator();
848  }
849 
850  bool isVALU(const SUnit *SU) const {
851  const MachineInstr *MI = SU->getInstr();
852  return MI && TII->isVALU(*MI);
853  }
854 
855  bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
856  if (Pred->NodeNum < Succ->NodeNum)
857  return true;
858 
859  SmallVector<const SUnit*, 64> Succs({Succ}), Preds({Pred});
860 
861  for (unsigned I = 0; I < Succs.size(); ++I) {
862  for (const SDep &SI : Succs[I]->Succs) {
863  const SUnit *SU = SI.getSUnit();
864  if (SU != Succs[I] && !llvm::is_contained(Succs, SU))
865  Succs.push_back(SU);
866  }
867  }
868 
870  while (!Preds.empty()) {
871  const SUnit *SU = Preds.pop_back_val();
872  if (llvm::is_contained(Succs, SU))
873  return false;
874  Visited.insert(SU);
875  for (const SDep &SI : SU->Preds)
876  if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit()))
877  Preds.push_back(SI.getSUnit());
878  }
879 
880  return true;
881  }
882 
883  // Link as many SALU instructions in chain as possible. Return the size
884  // of the chain. Links up to MaxChain instructions.
885  unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
886  SmallPtrSetImpl<SUnit *> &Visited) const {
887  SmallVector<SUnit *, 8> Worklist({To});
888  unsigned Linked = 0;
889 
890  while (!Worklist.empty() && MaxChain-- > 0) {
891  SUnit *SU = Worklist.pop_back_val();
892  if (!Visited.insert(SU).second)
893  continue;
894 
895  LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From);
896  dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');
897 
898  if (SU->addPred(SDep(From, SDep::Artificial), false))
899  ++Linked;
900 
901  for (SDep &SI : From->Succs) {
902  SUnit *SUv = SI.getSUnit();
903  if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU))
904  SUv->addPred(SDep(SU, SDep::Artificial), false);
905  }
906 
907  for (SDep &SI : SU->Succs) {
908  SUnit *Succ = SI.getSUnit();
909  if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ))
910  Worklist.push_back(Succ);
911  }
912  }
913 
914  return Linked;
915  }
916 
917  void apply(ScheduleDAGInstrs *DAGInstrs) override {
918  const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
919  if (!ST.hasMAIInsts() || DisablePowerSched)
920  return;
921  DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
922  const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
923  if (!TSchedModel || DAG->SUnits.empty())
924  return;
925 
926  // Scan for MFMA long latency instructions and try to add a dependency
927  // of available SALU instructions to give them a chance to fill MFMA
928  // shadow. That is desirable to fill MFMA shadow with SALU instructions
929  // rather than VALU to prevent power consumption bursts and throttle.
930  auto LastSALU = DAG->SUnits.begin();
931  auto E = DAG->SUnits.end();
932  SmallPtrSet<SUnit*, 32> Visited;
933  for (SUnit &SU : DAG->SUnits) {
934  MachineInstr &MAI = *SU.getInstr();
935  if (!TII->isMAI(MAI) ||
936  MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
937  MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64)
938  continue;
939 
940  unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;
941 
942  LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU);
943  dbgs() << "Need " << Lat
944  << " instructions to cover latency.\n");
945 
946  // Find up to Lat independent scalar instructions as early as
947  // possible such that they can be scheduled after this MFMA.
948  for ( ; Lat && LastSALU != E; ++LastSALU) {
949  if (Visited.count(&*LastSALU))
950  continue;
951 
952  if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU))
953  continue;
954 
955  Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
956  }
957  }
958  }
959 };
960 } // namespace
961 
963  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
964  Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
965 }
966 
967 std::unique_ptr<ScheduleDAGMutation>
969  return std::make_unique<FillMFMAShadowMutation>(&InstrInfo);
970 }
971 
974  return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
975  else
976  return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<R600Subtarget>());
977 }
978 
980  if (TM.getTargetTriple().getArch() == Triple::amdgcn)
981  return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
982  else
983  return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
984 }
llvm::MachineSchedPolicy::OnlyBottomUp
bool OnlyBottomUp
Definition: MachineScheduler.h:189
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr
Align getAlignmentForImplicitArgPtr() const
Definition: AMDGPUSubtarget.h:212
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:61
llvm::AMDGPURegisterBankInfo
Definition: AMDGPURegisterBankInfo.h:42
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:223
TargetFrameLowering.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition: AMDGPUMetadata.h:258
llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:43
llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:815
llvm::Wave32
@ Wave32
Definition: AMDGPUMCTargetDesc.h:31
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:204
llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28
llvm::AMDGPUSubtarget::HasFminFmaxLegacy
bool HasFminFmaxLegacy
Definition: AMDGPUSubtarget.h:61
SIMachineFunctionInfo.h
llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:60
llvm::Function
Definition: Function.h:60
llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:811
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:74
llvm::AMDGPUSubtarget::getMinWavesPerEU
virtual unsigned getMinWavesPerEU() const =0
llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:300
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::GlobalObject::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1227
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::Wave64
@ Wave64
Definition: AMDGPUMCTargetDesc.h:31
llvm::AMDGPU::getAmdhsaCodeObjectVersion
unsigned getAmdhsaCodeObjectVersion()
Definition: AMDGPUBaseInfo.cpp:153
llvm::AMDGPUSubtarget::getOccupancyWithLocalMemSize
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
Definition: AMDGPUSubtarget.cpp:315
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
EnableVGPRIndexMode
static cl::opt< bool > EnableVGPRIndexMode("amdgpu-vgpr-index-mode", cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false))
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:255
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:60
llvm::SPIRV::Dim
Dim
Definition: SPIRVBaseInfo.h:279
llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize
virtual unsigned getMaxFlatWorkGroupSize() const =0
getMaxNumPreloadedSGPRs
static unsigned getMaxNumPreloadedSGPRs()
Definition: AMDGPUSubtarget.cpp:730
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs
unsigned getNumPreloadedSGPRs() const
Definition: SIMachineFunctionInfo.h:757
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:548
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:223
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::AMDGPULegalizerInfo
This class provides the information for the target register banks.
Definition: AMDGPULegalizerInfo.h:31
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition: AMDGPUBaseInfo.h:94
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:786
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:120
llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:116
llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:571
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1280
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:972
llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1616
InlineAsmLowering.h
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1120
llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:592
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:324
SmallString.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SIInstrInfo::getSchedModel
const TargetSchedModel & getSchedModel() const
Definition: SIInstrInfo.h:1171
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
llvm::Legalizer
Definition: Legalizer.h:36
getReqdWorkGroupSize
static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim)
Definition: AMDGPUSubtarget.cpp:429
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:219
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:128
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:598
llvm::AMDGPUSubtarget::getImplicitArgNumBytes
unsigned getImplicitArgNumBytes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:510
llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:169
llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition: AMDGPUSubtarget.cpp:676
AMDGPUSubtarget.h
llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:236
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:207
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
MCSubtargetInfo.h
llvm::AMDGPUSubtarget::isMesaKernel
bool isMesaKernel(const Function &F) const
Definition: AMDGPUSubtarget.cpp:436
llvm::Instruction
Definition: Instruction.h:42
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:927
llvm::SIInstrInfo::getInstrLatency
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
Definition: SIInstrInfo.cpp:8179
MDBuilder.h
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::getSramEccSetting
TargetIDSetting getSramEccSetting() const
Definition: AMDGPUBaseInfo.h:164
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::AMDGPUSubtarget::getMaxWorkitemID
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition: AMDGPUSubtarget.cpp:440
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:345
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1190
llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:63
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Definition: AMDGPUSubtarget.h:208
llvm::None
const NoneType None
Definition: None.h:24
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::SmallString< 256 >
llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(const Triple &TT)
Definition: AMDGPUSubtarget.cpp:157
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:207
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::AMDGPUDwarfFlavour
AMDGPUDwarfFlavour
Definition: AMDGPUMCTargetDesc.h:31
llvm::cl::opt< bool >
llvm::AMDGPUSubtarget::WavefrontSizeLog2
char WavefrontSizeLog2
Definition: AMDGPUSubtarget.h:66
llvm::AMDGPUSubtarget::makeLIDRangeMetadata
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition: AMDGPUSubtarget.cpp:448
llvm::R600Subtarget
Definition: R600Subtarget.h:29
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
AMDGPURegisterBankInfo.h
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPU::getIntegerAttribute
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
Definition: AMDGPUBaseInfo.cpp:947
llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1173
llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition: AMDGPUSubtarget.cpp:159
llvm::AMDGPUCallLowering
Definition: AMDGPUCallLowering.h:26
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:328
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
AMDGPUInstructionSelector.h
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1682
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AMDGPUInstructionSelector
Definition: AMDGPUInstructionSelector.h:47
llvm::X86AS::FS
@ FS
Definition: X86.h:192
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition: AMDGPUSubtarget.cpp:302
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::getXnackSetting
TargetIDSetting getXnackSetting() const
Definition: AMDGPUBaseInfo.h:135
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1184
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1019
llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:624
llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:271
llvm::TargetSchedModel::computeOperandLatency
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
Definition: TargetSchedule.cpp:183
llvm::MDNode
Metadata node.
Definition: Metadata.h:937
UseAA
static cl::opt< bool > UseAA("amdgpu-use-aa-in-codegen", cl::desc("Enable the use of AA during codegen."), cl::init(true))
llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:64
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::AMDGPUSubtarget::MaxWavesPerEU
unsigned MaxWavesPerEU
Definition: AMDGPUSubtarget.h:64
llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition: AMDGPUSubtarget.h:218
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:264
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:75
llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize
virtual unsigned getMinFlatWorkGroupSize() const =0
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
llvm::AMDGPUSubtarget::getAMDGPUDwarfFlavour
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
Definition: AMDGPUSubtarget.cpp:566
llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
uint32_t
llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:588
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Definition: AMDGPUSubtarget.cpp:793
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1114
llvm::MachineSchedPolicy::ShouldTrackPressure
bool ShouldTrackPressure
Definition: MachineScheduler.h:181
llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
Definition: AMDGPUSubtarget.cpp:962
llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:552
llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:73
llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:650
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1163
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::AMDGPUSubtarget::getExplicitKernArgSize
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:526
llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition: AMDGPUSubtarget.h:32
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
AMDGPUGenSubtargetInfo
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::AMDGPUSubtarget::HasSMulHi
bool HasSMulHi
Definition: AMDGPUSubtarget.h:59
llvm::MachineSchedPolicy::OnlyTopDown
bool OnlyTopDown
Definition: MachineScheduler.h:188
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:652
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:596
R600Subtarget.h
llvm::SIMachineFunctionInfo::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU() const
Definition: SIMachineFunctionInfo.h:900
llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:107
llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition: AMDGPUSubtarget.cpp:204
llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
llvm::MachineInstr::isBundle
bool isBundle() const
Definition: MachineInstr.h:1288
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:262
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::AMDGPUSubtarget::LocalMemorySize
unsigned LocalMemorySize
Definition: AMDGPUSubtarget.h:65
AMDGPULegalizerInfo.h
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:540
AMDGPUCallLowering.h
llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:115
MachineScheduler.h
DisablePowerSched
static cl::opt< bool > DisablePowerSched("amdgpu-disable-power-sched", cl::desc("Disable scheduling to minimize mAI power bursts"), cl::init(false))
llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1154
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:180
llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
Definition: AMDGPUSubtarget.cpp:634
llvm::MDBuilder::createRange
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition: MDBuilder.cpp:84
llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override
llvm::AMDGPUSubtarget::getDefaultFlatWorkGroupSize
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Definition: AMDGPUSubtarget.cpp:358
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:349
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:231
llvm::AMDGPU::IsaInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:647
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:126
llvm::MachineSchedPolicy::ShouldTrackLaneMasks
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
Definition: MachineScheduler.h:184
llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:919
llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:256
llvm::ScheduleDAGInstrs::getSchedModel
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
Definition: ScheduleDAGInstrs.h:262
llvm::ScheduleDAGMutation
Mutate the DAG as a postpass after normal DAG building.
Definition: ScheduleDAGMutation.h:22
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumVGPRs
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
Definition: AMDGPUMetadata.h:260
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setTargetIDFromFeaturesString
void setTargetIDFromFeaturesString(StringRef FS)
Definition: AMDGPUBaseInfo.cpp:415
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::createFillMFMAShadowMutation
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
Definition: AMDGPUSubtarget.cpp:968
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:179
llvm::AMDGPUSubtarget::getFlatWorkGroupSizes
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:372
llvm::cl::desc
Definition: CommandLine.h:405
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
llvm::AMDGPU::getIntegerPairAttribute
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
Definition: AMDGPUBaseInfo.cpp:962
llvm::AMDGPUSubtarget::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Definition: AMDGPUSubtarget.h:96
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:663
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:681
AMDGPUTargetMachine.h
llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
llvm::SIInstrInfo::pseudoToMCOpcode
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
Definition: SIInstrInfo.cpp:7874
llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
Definition: AMDGPUSubtarget.cpp:753
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
AMDGPUBaseInfo.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365