LLVM  14.0.0git
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Base class for AMDGPU specific classes of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/CallingConv.h"
19 #include "llvm/Support/Alignment.h"
20 
21 namespace llvm {
22 
23 enum AMDGPUDwarfFlavour : unsigned;
24 class Function;
25 class Instruction;
26 class MachineFunction;
27 class TargetMachine;
28 
30 public:
31  enum Generation {
32  INVALID = 0,
33  R600 = 1,
34  R700 = 2,
35  EVERGREEN = 3,
40  GFX9 = 8,
41  GFX10 = 9
42  };
43 
44 private:
45  Triple TargetTriple;
46 
47 protected:
53  bool HasSDWA;
55  bool HasMulI24;
56  bool HasMulU24;
57  bool HasSMulHi;
62  unsigned MaxWavesPerEU;
63  unsigned LocalMemorySize;
65 
66 public:
67  AMDGPUSubtarget(const Triple &TT);
68 
69  static const AMDGPUSubtarget &get(const MachineFunction &MF);
70  static const AMDGPUSubtarget &get(const TargetMachine &TM,
71  const Function &F);
72 
73  /// \returns Default range flat work group size for a calling convention.
74  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
75 
76  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
77  /// for function \p F, or minimum/maximum flat work group sizes explicitly
78  /// requested using "amdgpu-flat-work-group-size" attribute attached to
79  /// function \p F.
80  ///
81  /// \returns Subtarget's default values if explicitly requested values cannot
82  /// be converted to integer, or violate subtarget's specifications.
83  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
84 
85  /// \returns Subtarget's default pair of minimum/maximum number of waves per
86  /// execution unit for function \p F, or minimum/maximum number of waves per
87  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
88  /// attached to function \p F.
89  ///
90  /// \returns Subtarget's default values if explicitly requested values cannot
91  /// be converted to integer, violate subtarget's specifications, or are not
92  /// compatible with minimum/maximum number of waves limited by flat work group
93  /// size, register usage, and/or lds usage.
94  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
95  // Default/requested minimum/maximum flat work group sizes.
96  std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
97  return getWavesPerEU(F, FlatWorkGroupSizes);
98  }
99 
100  /// Overload which uses the specified values for the flat work group sizes,
101  /// rather than querying the function itself. \p FlatWorkGroupSizes Should
102  /// correspond to the function's value for getFlatWorkGroupSizes.
103  std::pair<unsigned, unsigned>
104  getWavesPerEU(const Function &F,
105  std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
106 
107  /// Return the amount of LDS that can be used that will not restrict the
108  /// occupancy lower than WaveCount.
109  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
110  const Function &) const;
111 
112  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
113  /// the given LDS memory size is the only constraint.
114  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
115 
116  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
117 
118  bool isAmdHsaOS() const {
119  return TargetTriple.getOS() == Triple::AMDHSA;
120  }
121 
122  bool isAmdPalOS() const {
123  return TargetTriple.getOS() == Triple::AMDPAL;
124  }
125 
126  bool isMesa3DOS() const {
127  return TargetTriple.getOS() == Triple::Mesa3D;
128  }
129 
130  bool isMesaKernel(const Function &F) const;
131 
132  bool isAmdHsaOrMesa(const Function &F) const {
133  return isAmdHsaOS() || isMesaKernel(F);
134  }
135 
136  bool isGCN() const {
137  return TargetTriple.getArch() == Triple::amdgcn;
138  }
139 
140  bool isGCN3Encoding() const {
141  return GCN3Encoding;
142  }
143 
144  bool has16BitInsts() const {
145  return Has16BitInsts;
146  }
147 
148  bool hasMadMixInsts() const {
149  return HasMadMixInsts;
150  }
151 
152  bool hasMadMacF32Insts() const {
153  return HasMadMacF32Insts || !isGCN();
154  }
155 
156  bool hasDsSrc2Insts() const {
157  return HasDsSrc2Insts;
158  }
159 
160  bool hasSDWA() const {
161  return HasSDWA;
162  }
163 
164  bool hasVOP3PInsts() const {
165  return HasVOP3PInsts;
166  }
167 
168  bool hasMulI24() const {
169  return HasMulI24;
170  }
171 
172  bool hasMulU24() const {
173  return HasMulU24;
174  }
175 
176  bool hasSMulHi() const {
177  return HasSMulHi;
178  }
179 
180  bool hasInv2PiInlineImm() const {
181  return HasInv2PiInlineImm;
182  }
183 
184  bool hasFminFmaxLegacy() const {
185  return HasFminFmaxLegacy;
186  }
187 
188  bool hasTrigReducedRange() const {
189  return HasTrigReducedRange;
190  }
191 
192  bool isPromoteAllocaEnabled() const {
193  return EnablePromoteAlloca;
194  }
195 
196  unsigned getWavefrontSize() const {
197  return 1 << WavefrontSizeLog2;
198  }
199 
200  unsigned getWavefrontSizeLog2() const {
201  return WavefrontSizeLog2;
202  }
203 
204  unsigned getLocalMemorySize() const {
205  return LocalMemorySize;
206  }
207 
209  return isAmdHsaOS() ? Align(8) : Align(4);
210  }
211 
212  /// Returns the offset in bytes from the start of the input buffer
213  /// of the first explicit kernel argument.
214  unsigned getExplicitKernelArgOffset(const Function &F) const {
215  return isAmdHsaOrMesa(F) ? 0 : 36;
216  }
217 
218  /// \returns Maximum number of work groups per compute unit supported by the
219  /// subtarget and limited by given \p FlatWorkGroupSize.
220  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
221 
222  /// \returns Minimum flat work group size supported by the subtarget.
223  virtual unsigned getMinFlatWorkGroupSize() const = 0;
224 
225  /// \returns Maximum flat work group size supported by the subtarget.
226  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
227 
228  /// \returns Number of waves per execution unit required to support the given
229  /// \p FlatWorkGroupSize.
230  virtual unsigned
231  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
232 
233  /// \returns Minimum number of waves per execution unit supported by the
234  /// subtarget.
235  virtual unsigned getMinWavesPerEU() const = 0;
236 
237  /// \returns Maximum number of waves per execution unit supported by the
238  /// subtarget without any kind of limitation.
239  unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
240 
241  /// Return the maximum workitem ID value in the function, for the given (0, 1,
242  /// 2) dimension.
243  unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
244 
245  /// Creates value range metadata on an workitemid.* intrinsic call or load.
246  bool makeLIDRangeMetadata(Instruction *I) const;
247 
248  /// \returns Number of bytes of arguments that are passed to a shader or
249  /// kernel in addition to the explicit ones declared for the function.
250  unsigned getImplicitArgNumBytes(const Function &F) const;
251  uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
252  unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
253 
254  /// \returns Corresponsing DWARF register number mapping flavour for the
255  /// \p WavefrontSize.
257 
258  virtual ~AMDGPUSubtarget() {}
259 };
260 
261 } // end namespace llvm
262 
263 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr
Align getAlignmentForImplicitArgPtr() const
Definition: AMDGPUSubtarget.h:208
llvm::AMDGPUSubtarget::HasMulI24
bool HasMulI24
Definition: AMDGPUSubtarget.h:55
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:180
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::AMDGPUSubtarget::HasDsSrc2Insts
bool HasDsSrc2Insts
Definition: AMDGPUSubtarget.h:52
llvm::AMDGPUSubtarget::NORTHERN_ISLANDS
@ NORTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:36
llvm::AMDGPUSubtarget::HasFminFmaxLegacy
bool HasFminFmaxLegacy
Definition: AMDGPUSubtarget.h:59
llvm::Function
Definition: Function.h:61
llvm::AMDGPUSubtarget::hasMulI24
bool hasMulI24() const
Definition: AMDGPUSubtarget.h:168
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:148
llvm::AMDGPUSubtarget::EnablePromoteAlloca
bool EnablePromoteAlloca
Definition: AMDGPUSubtarget.h:60
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:72
llvm::AMDGPUSubtarget::getMinWavesPerEU
virtual unsigned getMinWavesPerEU() const =0
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::AMDGPUSubtarget::getOccupancyWithLocalMemSize
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
Definition: AMDGPUSubtarget.cpp:455
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:239
llvm::AMDGPUSubtarget::hasSDWA
bool hasSDWA() const
Definition: AMDGPUSubtarget.h:160
llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize
virtual unsigned getMaxFlatWorkGroupSize() const =0
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:683
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:118
llvm::AMDGPUSubtarget::hasSMulHi
bool hasSMulHi() const
Definition: AMDGPUSubtarget.h:176
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:1108
llvm::Triple::Mesa3D
@ Mesa3D
Definition: Triple.h:195
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:126
llvm::AMDGPUSubtarget::HasInv2PiInlineImm
bool HasInv2PiInlineImm
Definition: AMDGPUSubtarget.h:58
llvm::AMDGPUSubtarget::HasMadMixInsts
bool HasMadMixInsts
Definition: AMDGPUSubtarget.h:50
llvm::AMDGPUSubtarget::GCN3Encoding
bool GCN3Encoding
Definition: AMDGPUSubtarget.h:48
llvm::AMDGPUSubtarget::~AMDGPUSubtarget
virtual ~AMDGPUSubtarget()
Definition: AMDGPUSubtarget.h:258
llvm::AMDGPUSubtarget::getImplicitArgNumBytes
unsigned getImplicitArgNumBytes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:653
llvm::AMDGPUSubtarget::isGCN
bool isGCN() const
Definition: AMDGPUSubtarget.h:136
llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
llvm::AMDGPUSubtarget::isMesaKernel
bool isMesaKernel(const Function &F) const
Definition: AMDGPUSubtarget.cpp:579
llvm::Instruction
Definition: Instruction.h:45
llvm::AMDGPUSubtarget::Has16BitInsts
bool Has16BitInsts
Definition: AMDGPUSubtarget.h:49
llvm::AMDGPUSubtarget::getMaxWorkitemID
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition: AMDGPUSubtarget.cpp:583
llvm::AMDGPUSubtarget::has16BitInsts
bool has16BitInsts() const
Definition: AMDGPUSubtarget.h:144
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::getArch
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:307
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Definition: AMDGPUSubtarget.h:204
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(const Triple &TT)
Definition: AMDGPUSubtarget.cpp:162
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:196
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:190
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::AMDGPUSubtarget::HasTrigReducedRange
bool HasTrigReducedRange
Definition: AMDGPUSubtarget.h:61
llvm::AMDGPUDwarfFlavour
AMDGPUDwarfFlavour
Definition: AMDGPUMCTargetDesc.h:31
llvm::AMDGPUSubtarget::WavefrontSizeLog2
char WavefrontSizeLog2
Definition: AMDGPUSubtarget.h:64
llvm::AMDGPUSubtarget::makeLIDRangeMetadata
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition: AMDGPUSubtarget.cpp:591
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
uint64_t
llvm::omp::Kernel
Function * Kernel
Summary of a kernel (=entry point for target offloading).
Definition: OpenMPOpt.h:21
llvm::Triple::getOS
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:316
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::AMDGPUSubtarget::hasMadMacF32Insts
bool hasMadMacF32Insts() const
Definition: AMDGPUSubtarget.h:152
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition: AMDGPUSubtarget.cpp:442
llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:34
llvm::AMDGPUSubtarget::hasFminFmaxLegacy
bool hasFminFmaxLegacy() const
Definition: AMDGPUSubtarget.h:184
llvm::AMDGPUSubtarget::MaxWavesPerEU
unsigned MaxWavesPerEU
Definition: AMDGPUSubtarget.h:62
llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition: AMDGPUSubtarget.h:214
llvm::MachineFunction
Definition: MachineFunction.h:230
Triple.h
llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize
virtual unsigned getMinFlatWorkGroupSize() const =0
llvm::AMDGPUSubtarget::hasTrigReducedRange
bool hasTrigReducedRange() const
Definition: AMDGPUSubtarget.h:188
llvm::AMDGPUSubtarget::hasVOP3PInsts
bool hasVOP3PInsts() const
Definition: AMDGPUSubtarget.h:164
llvm::AMDGPUSubtarget::hasMulU24
bool hasMulU24() const
Definition: AMDGPUSubtarget.h:172
llvm::AMDGPUSubtarget::getAMDGPUDwarfFlavour
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
Definition: AMDGPUSubtarget.cpp:700
uint32_t
llvm::AMDGPUSubtarget::isGCN3Encoding
bool isGCN3Encoding() const
Definition: AMDGPUSubtarget.h:140
llvm::AMDGPUSubtarget::hasDsSrc2Insts
bool hasDsSrc2Insts() const
Definition: AMDGPUSubtarget.h:156
llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
CallingConv.h
llvm::AMDGPUSubtarget::getExplicitKernArgSize
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:659
llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition: AMDGPUSubtarget.h:32
Alignment.h
llvm::AMDGPUSubtarget::HasSMulHi
bool HasSMulHi
Definition: AMDGPUSubtarget.h:57
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::Triple::AMDPAL
@ AMDPAL
Definition: Triple.h:197
llvm::AMDGPUSubtarget::LocalMemorySize
unsigned LocalMemorySize
Definition: AMDGPUSubtarget.h:63
llvm::AMDGPUSubtarget::getDefaultFlatWorkGroupSize
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Definition: AMDGPUSubtarget.cpp:498
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::AMDGPUSubtarget::HasMulU24
bool HasMulU24
Definition: AMDGPUSubtarget.h:56
llvm::AMDGPUSubtarget::getFlatWorkGroupSizes
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:512
llvm::AMDGPUSubtarget::R600
@ R600
Definition: AMDGPUSubtarget.h:33
llvm::AMDGPUSubtarget::isPromoteAllocaEnabled
bool isPromoteAllocaEnabled() const
Definition: AMDGPUSubtarget.h:192
llvm::AMDGPUSubtarget::HasSDWA
bool HasSDWA
Definition: AMDGPUSubtarget.h:53
llvm::AMDGPUSubtarget::HasVOP3PInsts
bool HasVOP3PInsts
Definition: AMDGPUSubtarget.h:54
llvm::AMDGPUSubtarget::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Definition: AMDGPUSubtarget.h:94
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:200
llvm::AMDGPUSubtarget::HasMadMacF32Insts
bool HasMadMacF32Insts
Definition: AMDGPUSubtarget.h:51
llvm::AMDGPUSubtarget::EVERGREEN
@ EVERGREEN
Definition: AMDGPUSubtarget.h:35
llvm::AMDGPUSubtarget::isAmdHsaOrMesa
bool isAmdHsaOrMesa(const Function &F) const
Definition: AMDGPUSubtarget.h:132
llvm::codeview::PublicSymFlags::Function
@ Function
llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition: AMDGPUSubtarget.h:122