LLVM  15.0.0git
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Base class for AMDGPU specific classes of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/CallingConv.h"
19 #include "llvm/Support/Alignment.h"
20 
21 namespace llvm {
22 
23 enum AMDGPUDwarfFlavour : unsigned;
24 class Function;
25 class Instruction;
26 class MachineFunction;
27 class TargetMachine;
28 
30 public:
31  enum Generation {
32  INVALID = 0,
33  R600 = 1,
34  R700 = 2,
35  EVERGREEN = 3,
40  GFX9 = 8,
41  GFX10 = 9,
42  GFX11 = 10
43  };
44 
45 private:
46  Triple TargetTriple;
47 
48 protected:
49  bool GCN3Encoding = false;
50  bool Has16BitInsts = false;
51  bool HasTrue16BitInsts = false;
52  bool HasMadMixInsts = false;
53  bool HasMadMacF32Insts = false;
54  bool HasDsSrc2Insts = false;
55  bool HasSDWA = false;
56  bool HasVOP3PInsts = false;
57  bool HasMulI24 = true;
58  bool HasMulU24 = true;
59  bool HasSMulHi = false;
60  bool HasInv2PiInlineImm = false;
61  bool HasFminFmaxLegacy = true;
62  bool EnablePromoteAlloca = false;
63  bool HasTrigReducedRange = false;
64  unsigned MaxWavesPerEU = 10;
65  unsigned LocalMemorySize = 0;
67 
68 public:
69  AMDGPUSubtarget(const Triple &TT);
70 
71  static const AMDGPUSubtarget &get(const MachineFunction &MF);
72  static const AMDGPUSubtarget &get(const TargetMachine &TM,
73  const Function &F);
74 
75  /// \returns Default range flat work group size for a calling convention.
76  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
77 
78  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
79  /// for function \p F, or minimum/maximum flat work group sizes explicitly
80  /// requested using "amdgpu-flat-work-group-size" attribute attached to
81  /// function \p F.
82  ///
83  /// \returns Subtarget's default values if explicitly requested values cannot
84  /// be converted to integer, or violate subtarget's specifications.
85  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
86 
87  /// \returns Subtarget's default pair of minimum/maximum number of waves per
88  /// execution unit for function \p F, or minimum/maximum number of waves per
89  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
90  /// attached to function \p F.
91  ///
92  /// \returns Subtarget's default values if explicitly requested values cannot
93  /// be converted to integer, violate subtarget's specifications, or are not
94  /// compatible with minimum/maximum number of waves limited by flat work group
95  /// size, register usage, and/or lds usage.
96  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
97  // Default/requested minimum/maximum flat work group sizes.
98  std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
99  return getWavesPerEU(F, FlatWorkGroupSizes);
100  }
101 
102  /// Overload which uses the specified values for the flat work group sizes,
103  /// rather than querying the function itself. \p FlatWorkGroupSizes Should
104  /// correspond to the function's value for getFlatWorkGroupSizes.
105  std::pair<unsigned, unsigned>
106  getWavesPerEU(const Function &F,
107  std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
108 
109  /// Return the amount of LDS that can be used that will not restrict the
110  /// occupancy lower than WaveCount.
111  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
112  const Function &) const;
113 
114  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
115  /// the given LDS memory size is the only constraint.
116  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
117 
118  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
119 
120  bool isAmdHsaOS() const {
121  return TargetTriple.getOS() == Triple::AMDHSA;
122  }
123 
124  bool isAmdPalOS() const {
125  return TargetTriple.getOS() == Triple::AMDPAL;
126  }
127 
128  bool isMesa3DOS() const {
129  return TargetTriple.getOS() == Triple::Mesa3D;
130  }
131 
132  bool isMesaKernel(const Function &F) const;
133 
134  bool isAmdHsaOrMesa(const Function &F) const {
135  return isAmdHsaOS() || isMesaKernel(F);
136  }
137 
138  bool isGCN() const {
139  return TargetTriple.getArch() == Triple::amdgcn;
140  }
141 
142  bool isGCN3Encoding() const {
143  return GCN3Encoding;
144  }
145 
146  bool has16BitInsts() const {
147  return Has16BitInsts;
148  }
149 
150  bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
151 
152  bool hasMadMixInsts() const {
153  return HasMadMixInsts;
154  }
155 
156  bool hasMadMacF32Insts() const {
157  return HasMadMacF32Insts || !isGCN();
158  }
159 
160  bool hasDsSrc2Insts() const {
161  return HasDsSrc2Insts;
162  }
163 
164  bool hasSDWA() const {
165  return HasSDWA;
166  }
167 
168  bool hasVOP3PInsts() const {
169  return HasVOP3PInsts;
170  }
171 
172  bool hasMulI24() const {
173  return HasMulI24;
174  }
175 
176  bool hasMulU24() const {
177  return HasMulU24;
178  }
179 
180  bool hasSMulHi() const {
181  return HasSMulHi;
182  }
183 
184  bool hasInv2PiInlineImm() const {
185  return HasInv2PiInlineImm;
186  }
187 
188  bool hasFminFmaxLegacy() const {
189  return HasFminFmaxLegacy;
190  }
191 
192  bool hasTrigReducedRange() const {
193  return HasTrigReducedRange;
194  }
195 
196  bool isPromoteAllocaEnabled() const {
197  return EnablePromoteAlloca;
198  }
199 
200  unsigned getWavefrontSize() const {
201  return 1 << WavefrontSizeLog2;
202  }
203 
204  unsigned getWavefrontSizeLog2() const {
205  return WavefrontSizeLog2;
206  }
207 
208  unsigned getLocalMemorySize() const {
209  return LocalMemorySize;
210  }
211 
213  return isAmdHsaOS() ? Align(8) : Align(4);
214  }
215 
216  /// Returns the offset in bytes from the start of the input buffer
217  /// of the first explicit kernel argument.
218  unsigned getExplicitKernelArgOffset(const Function &F) const {
219  switch (TargetTriple.getOS()) {
220  case Triple::AMDHSA:
221  case Triple::AMDPAL:
222  case Triple::Mesa3D:
223  return 0;
224  case Triple::UnknownOS:
225  default:
226  // For legacy reasons unknown/other is treated as a different version of
227  // mesa.
228  return 36;
229  }
230 
231  llvm_unreachable("invalid triple OS");
232  }
233 
234  /// \returns Maximum number of work groups per compute unit supported by the
235  /// subtarget and limited by given \p FlatWorkGroupSize.
236  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
237 
238  /// \returns Minimum flat work group size supported by the subtarget.
239  virtual unsigned getMinFlatWorkGroupSize() const = 0;
240 
241  /// \returns Maximum flat work group size supported by the subtarget.
242  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
243 
244  /// \returns Number of waves per execution unit required to support the given
245  /// \p FlatWorkGroupSize.
246  virtual unsigned
247  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
248 
249  /// \returns Minimum number of waves per execution unit supported by the
250  /// subtarget.
251  virtual unsigned getMinWavesPerEU() const = 0;
252 
253  /// \returns Maximum number of waves per execution unit supported by the
254  /// subtarget without any kind of limitation.
255  unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
256 
257  /// Return the maximum workitem ID value in the function, for the given (0, 1,
258  /// 2) dimension.
259  unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
260 
261  /// Creates value range metadata on an workitemid.* intrinsic call or load.
262  bool makeLIDRangeMetadata(Instruction *I) const;
263 
264  /// \returns Number of bytes of arguments that are passed to a shader or
265  /// kernel in addition to the explicit ones declared for the function.
266  unsigned getImplicitArgNumBytes(const Function &F) const;
267  uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
268  unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
269 
270  /// \returns Corresponding DWARF register number mapping flavour for the
271  /// \p WavefrontSize.
273 
274  virtual ~AMDGPUSubtarget() = default;
275 };
276 
277 } // end namespace llvm
278 
279 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
llvm::AMDGPUSubtarget::HasTrue16BitInsts
bool HasTrue16BitInsts
Definition: AMDGPUSubtarget.h:51
llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr
Align getAlignmentForImplicitArgPtr() const
Definition: AMDGPUSubtarget.h:212
llvm::AMDGPUSubtarget::HasMulI24
bool HasMulI24
Definition: AMDGPUSubtarget.h:57
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:184
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::AMDGPUSubtarget::HasDsSrc2Insts
bool HasDsSrc2Insts
Definition: AMDGPUSubtarget.h:54
llvm::AMDGPUSubtarget::NORTHERN_ISLANDS
@ NORTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:36
llvm::AMDGPUSubtarget::HasFminFmaxLegacy
bool HasFminFmaxLegacy
Definition: AMDGPUSubtarget.h:61
llvm::Function
Definition: Function.h:60
llvm::AMDGPUSubtarget::hasMulI24
bool hasMulI24() const
Definition: AMDGPUSubtarget.h:172
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:152
llvm::AMDGPUSubtarget::EnablePromoteAlloca
bool EnablePromoteAlloca
Definition: AMDGPUSubtarget.h:62
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:74
llvm::AMDGPUSubtarget::getMinWavesPerEU
virtual unsigned getMinWavesPerEU() const =0
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::AMDGPUSubtarget::getOccupancyWithLocalMemSize
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
Definition: AMDGPUSubtarget.cpp:315
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:255
llvm::AMDGPUSubtarget::hasSDWA
bool hasSDWA() const
Definition: AMDGPUSubtarget.h:164
llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize
virtual unsigned getMaxFlatWorkGroupSize() const =0
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:548
llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition: AMDGPUSubtarget.h:42
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:120
llvm::AMDGPUSubtarget::hasSMulHi
bool hasSMulHi() const
Definition: AMDGPUSubtarget.h:180
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:972
llvm::Triple::Mesa3D
@ Mesa3D
Definition: Triple.h:214
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:128
llvm::AMDGPUSubtarget::HasInv2PiInlineImm
bool HasInv2PiInlineImm
Definition: AMDGPUSubtarget.h:60
llvm::AMDGPUSubtarget::HasMadMixInsts
bool HasMadMixInsts
Definition: AMDGPUSubtarget.h:52
llvm::AMDGPUSubtarget::GCN3Encoding
bool GCN3Encoding
Definition: AMDGPUSubtarget.h:49
llvm::AMDGPUSubtarget::getImplicitArgNumBytes
unsigned getImplicitArgNumBytes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:510
llvm::AMDGPUSubtarget::isGCN
bool isGCN() const
Definition: AMDGPUSubtarget.h:138
llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
llvm::AMDGPUSubtarget::isMesaKernel
bool isMesaKernel(const Function &F) const
Definition: AMDGPUSubtarget.cpp:436
llvm::Instruction
Definition: Instruction.h:42
llvm::AMDGPUSubtarget::Has16BitInsts
bool Has16BitInsts
Definition: AMDGPUSubtarget.h:50
llvm::AMDGPUSubtarget::getMaxWorkitemID
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition: AMDGPUSubtarget.cpp:440
llvm::AMDGPUSubtarget::has16BitInsts
bool has16BitInsts() const
Definition: AMDGPUSubtarget.h:146
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:345
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Definition: AMDGPUSubtarget.h:208
llvm::AMDGPUSubtarget::~AMDGPUSubtarget
virtual ~AMDGPUSubtarget()=default
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(const Triple &TT)
Definition: AMDGPUSubtarget.cpp:157
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:207
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::AMDGPUSubtarget::HasTrigReducedRange
bool HasTrigReducedRange
Definition: AMDGPUSubtarget.h:63
llvm::AMDGPUDwarfFlavour
AMDGPUDwarfFlavour
Definition: AMDGPUMCTargetDesc.h:31
llvm::AMDGPUSubtarget::WavefrontSizeLog2
char WavefrontSizeLog2
Definition: AMDGPUSubtarget.h:66
llvm::AMDGPUSubtarget::makeLIDRangeMetadata
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition: AMDGPUSubtarget.cpp:448
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
uint64_t
llvm::Triple::getOS
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:354
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AMDGPUSubtarget::hasMadMacF32Insts
bool hasMadMacF32Insts() const
Definition: AMDGPUSubtarget.h:156
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition: AMDGPUSubtarget.cpp:302
llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:34
llvm::AMDGPUSubtarget::hasFminFmaxLegacy
bool hasFminFmaxLegacy() const
Definition: AMDGPUSubtarget.h:188
llvm::AMDGPUSubtarget::MaxWavesPerEU
unsigned MaxWavesPerEU
Definition: AMDGPUSubtarget.h:64
llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition: AMDGPUSubtarget.h:218
llvm::MachineFunction
Definition: MachineFunction.h:257
Triple.h
llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize
virtual unsigned getMinFlatWorkGroupSize() const =0
llvm::AMDGPUSubtarget::hasTrigReducedRange
bool hasTrigReducedRange() const
Definition: AMDGPUSubtarget.h:192
llvm::AMDGPUSubtarget::hasVOP3PInsts
bool hasVOP3PInsts() const
Definition: AMDGPUSubtarget.h:168
llvm::AMDGPUSubtarget::hasMulU24
bool hasMulU24() const
Definition: AMDGPUSubtarget.h:176
llvm::AMDGPUSubtarget::getAMDGPUDwarfFlavour
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
Definition: AMDGPUSubtarget.cpp:566
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
uint32_t
llvm::AMDGPUSubtarget::isGCN3Encoding
bool isGCN3Encoding() const
Definition: AMDGPUSubtarget.h:142
llvm::AMDGPUSubtarget::hasDsSrc2Insts
bool hasDsSrc2Insts() const
Definition: AMDGPUSubtarget.h:160
llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
CallingConv.h
llvm::AMDGPUSubtarget::getExplicitKernArgSize
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:526
llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition: AMDGPUSubtarget.h:32
Alignment.h
llvm::AMDGPUSubtarget::HasSMulHi
bool HasSMulHi
Definition: AMDGPUSubtarget.h:59
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::AMDGPUSubtarget::hasTrue16BitInsts
bool hasTrue16BitInsts() const
Definition: AMDGPUSubtarget.h:150
llvm::Triple::AMDPAL
@ AMDPAL
Definition: Triple.h:216
llvm::AMDGPUSubtarget::LocalMemorySize
unsigned LocalMemorySize
Definition: AMDGPUSubtarget.h:65
llvm::AMDGPUSubtarget::getDefaultFlatWorkGroupSize
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Definition: AMDGPUSubtarget.cpp:358
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::AMDGPUSubtarget::HasMulU24
bool HasMulU24
Definition: AMDGPUSubtarget.h:58
llvm::AMDGPUSubtarget::getFlatWorkGroupSizes
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:372
llvm::AMDGPUSubtarget::R600
@ R600
Definition: AMDGPUSubtarget.h:33
llvm::Triple::UnknownOS
@ UnknownOS
Definition: Triple.h:182
llvm::AMDGPUSubtarget::isPromoteAllocaEnabled
bool isPromoteAllocaEnabled() const
Definition: AMDGPUSubtarget.h:196
llvm::AMDGPUSubtarget::HasSDWA
bool HasSDWA
Definition: AMDGPUSubtarget.h:55
llvm::AMDGPUSubtarget::HasVOP3PInsts
bool HasVOP3PInsts
Definition: AMDGPUSubtarget.h:56
llvm::AMDGPUSubtarget::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Definition: AMDGPUSubtarget.h:96
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:204
llvm::AMDGPUSubtarget::HasMadMacF32Insts
bool HasMadMacF32Insts
Definition: AMDGPUSubtarget.h:53
llvm::AMDGPUSubtarget::EVERGREEN
@ EVERGREEN
Definition: AMDGPUSubtarget.h:35
llvm::AMDGPUSubtarget::isAmdHsaOrMesa
bool isAmdHsaOrMesa(const Function &F) const
Definition: AMDGPUSubtarget.h:134
llvm::codeview::PublicSymFlags::Function
@ Function
llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition: AMDGPUSubtarget.h:124