LLVM  13.0.0git
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Base class for AMDGPU specific classes of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/CallingConv.h"
19 #include "llvm/Support/Alignment.h"
20 
21 namespace llvm {
22 
23 enum AMDGPUDwarfFlavour : unsigned;
24 class Function;
25 class Instruction;
26 class MachineFunction;
27 class TargetMachine;
28 
30 public:
31  enum Generation {
32  INVALID = 0,
33  R600 = 1,
34  R700 = 2,
35  EVERGREEN = 3,
40  GFX9 = 8,
41  GFX10 = 9
42  };
43 
44 private:
45  Triple TargetTriple;
46 
47 protected:
53  bool HasSDWA;
55  bool HasMulI24;
56  bool HasMulU24;
61  unsigned MaxWavesPerEU;
62  unsigned LocalMemorySize;
64 
65 public:
66  AMDGPUSubtarget(const Triple &TT);
67 
68  static const AMDGPUSubtarget &get(const MachineFunction &MF);
69  static const AMDGPUSubtarget &get(const TargetMachine &TM,
70  const Function &F);
71 
72  /// \returns Default range flat work group size for a calling convention.
73  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
74 
75  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
76  /// for function \p F, or minimum/maximum flat work group sizes explicitly
77  /// requested using "amdgpu-flat-work-group-size" attribute attached to
78  /// function \p F.
79  ///
80  /// \returns Subtarget's default values if explicitly requested values cannot
81  /// be converted to integer, or violate subtarget's specifications.
82  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
83 
84  /// \returns Subtarget's default pair of minimum/maximum number of waves per
85  /// execution unit for function \p F, or minimum/maximum number of waves per
86  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
87  /// attached to function \p F.
88  ///
89  /// \returns Subtarget's default values if explicitly requested values cannot
90  /// be converted to integer, violate subtarget's specifications, or are not
91  /// compatible with minimum/maximum number of waves limited by flat work group
92  /// size, register usage, and/or lds usage.
93  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
94 
95  /// Return the amount of LDS that can be used that will not restrict the
96  /// occupancy lower than WaveCount.
97  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
98  const Function &) const;
99 
100  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
101  /// the given LDS memory size is the only constraint.
102  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
103 
104  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
105 
106  bool isAmdHsaOS() const {
107  return TargetTriple.getOS() == Triple::AMDHSA;
108  }
109 
110  bool isAmdPalOS() const {
111  return TargetTriple.getOS() == Triple::AMDPAL;
112  }
113 
114  bool isMesa3DOS() const {
115  return TargetTriple.getOS() == Triple::Mesa3D;
116  }
117 
118  bool isMesaKernel(const Function &F) const;
119 
120  bool isAmdHsaOrMesa(const Function &F) const {
121  return isAmdHsaOS() || isMesaKernel(F);
122  }
123 
124  bool isGCN() const {
125  return TargetTriple.getArch() == Triple::amdgcn;
126  }
127 
128  bool isGCN3Encoding() const {
129  return GCN3Encoding;
130  }
131 
132  bool has16BitInsts() const {
133  return Has16BitInsts;
134  }
135 
136  bool hasMadMixInsts() const {
137  return HasMadMixInsts;
138  }
139 
140  bool hasMadMacF32Insts() const {
141  return HasMadMacF32Insts || !isGCN();
142  }
143 
144  bool hasDsSrc2Insts() const {
145  return HasDsSrc2Insts;
146  }
147 
148  bool hasSDWA() const {
149  return HasSDWA;
150  }
151 
152  bool hasVOP3PInsts() const {
153  return HasVOP3PInsts;
154  }
155 
156  bool hasMulI24() const {
157  return HasMulI24;
158  }
159 
160  bool hasMulU24() const {
161  return HasMulU24;
162  }
163 
164  bool hasInv2PiInlineImm() const {
165  return HasInv2PiInlineImm;
166  }
167 
168  bool hasFminFmaxLegacy() const {
169  return HasFminFmaxLegacy;
170  }
171 
172  bool hasTrigReducedRange() const {
173  return HasTrigReducedRange;
174  }
175 
176  bool isPromoteAllocaEnabled() const {
177  return EnablePromoteAlloca;
178  }
179 
180  unsigned getWavefrontSize() const {
181  return 1 << WavefrontSizeLog2;
182  }
183 
184  unsigned getWavefrontSizeLog2() const {
185  return WavefrontSizeLog2;
186  }
187 
188  unsigned getLocalMemorySize() const {
189  return LocalMemorySize;
190  }
191 
193  return isAmdHsaOS() ? Align(8) : Align(4);
194  }
195 
196  /// Returns the offset in bytes from the start of the input buffer
197  /// of the first explicit kernel argument.
198  unsigned getExplicitKernelArgOffset(const Function &F) const {
199  return isAmdHsaOrMesa(F) ? 0 : 36;
200  }
201 
202  /// \returns Maximum number of work groups per compute unit supported by the
203  /// subtarget and limited by given \p FlatWorkGroupSize.
204  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
205 
206  /// \returns Minimum flat work group size supported by the subtarget.
207  virtual unsigned getMinFlatWorkGroupSize() const = 0;
208 
209  /// \returns Maximum flat work group size supported by the subtarget.
210  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
211 
212  /// \returns Number of waves per execution unit required to support the given
213  /// \p FlatWorkGroupSize.
214  virtual unsigned
215  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
216 
217  /// \returns Minimum number of waves per execution unit supported by the
218  /// subtarget.
219  virtual unsigned getMinWavesPerEU() const = 0;
220 
221  /// \returns Maximum number of waves per execution unit supported by the
222  /// subtarget without any kind of limitation.
223  unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
224 
225  /// Return the maximum workitem ID value in the function, for the given (0, 1,
226  /// 2) dimension.
227  unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
228 
229  /// Creates value range metadata on an workitemid.* intrinsic call or load.
230  bool makeLIDRangeMetadata(Instruction *I) const;
231 
232  /// \returns Number of bytes of arguments that are passed to a shader or
233  /// kernel in addition to the explicit ones declared for the function.
234  unsigned getImplicitArgNumBytes(const Function &F) const;
235  uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
236  unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
237 
238  /// \returns Corresponsing DWARF register number mapping flavour for the
239  /// \p WavefrontSize.
241 
242  virtual ~AMDGPUSubtarget() {}
243 };
244 
245 } // end namespace llvm
246 
247 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr
Align getAlignmentForImplicitArgPtr() const
Definition: AMDGPUSubtarget.h:192
llvm::AMDGPUSubtarget::HasMulI24
bool HasMulI24
Definition: AMDGPUSubtarget.h:55
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:164
llvm
Definition: AllocatorList.h:23
llvm::AMDGPUSubtarget::HasDsSrc2Insts
bool HasDsSrc2Insts
Definition: AMDGPUSubtarget.h:52
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::AMDGPUSubtarget::NORTHERN_ISLANDS
@ NORTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:36
llvm::AMDGPUSubtarget::HasFminFmaxLegacy
bool HasFminFmaxLegacy
Definition: AMDGPUSubtarget.h:58
llvm::Function
Definition: Function.h:61
llvm::AMDGPUSubtarget::hasMulI24
bool hasMulI24() const
Definition: AMDGPUSubtarget.h:156
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:136
llvm::AMDGPUSubtarget::EnablePromoteAlloca
bool EnablePromoteAlloca
Definition: AMDGPUSubtarget.h:59
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:72
llvm::AMDGPUSubtarget::getMinWavesPerEU
virtual unsigned getMinWavesPerEU() const =0
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::AMDGPUSubtarget::getOccupancyWithLocalMemSize
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
Definition: AMDGPUSubtarget.cpp:368
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:223
llvm::AMDGPUSubtarget::hasSDWA
bool hasSDWA() const
Definition: AMDGPUSubtarget.h:148
llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize
virtual unsigned getMaxFlatWorkGroupSize() const =0
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:599
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:106
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:980
llvm::Triple::Mesa3D
@ Mesa3D
Definition: Triple.h:195
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:114
llvm::AMDGPUSubtarget::HasInv2PiInlineImm
bool HasInv2PiInlineImm
Definition: AMDGPUSubtarget.h:57
llvm::AMDGPUSubtarget::HasMadMixInsts
bool HasMadMixInsts
Definition: AMDGPUSubtarget.h:50
llvm::AMDGPUSubtarget::GCN3Encoding
bool GCN3Encoding
Definition: AMDGPUSubtarget.h:48
llvm::AMDGPUSubtarget::~AMDGPUSubtarget
virtual ~AMDGPUSubtarget()
Definition: AMDGPUSubtarget.h:242
llvm::AMDGPUSubtarget::getImplicitArgNumBytes
unsigned getImplicitArgNumBytes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:569
llvm::AMDGPUSubtarget::isGCN
bool isGCN() const
Definition: AMDGPUSubtarget.h:124
llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
llvm::AMDGPUSubtarget::isMesaKernel
bool isMesaKernel(const Function &F) const
Definition: AMDGPUSubtarget.cpp:495
llvm::Instruction
Definition: Instruction.h:45
llvm::AMDGPUSubtarget::Has16BitInsts
bool Has16BitInsts
Definition: AMDGPUSubtarget.h:49
llvm::AMDGPUSubtarget::getMaxWorkitemID
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition: AMDGPUSubtarget.cpp:499
llvm::AMDGPUSubtarget::has16BitInsts
bool has16BitInsts() const
Definition: AMDGPUSubtarget.h:132
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::getArch
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:307
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Definition: AMDGPUSubtarget.h:188
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(const Triple &TT)
Definition: AMDGPUSubtarget.cpp:177
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:180
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:190
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::AMDGPUSubtarget::HasTrigReducedRange
bool HasTrigReducedRange
Definition: AMDGPUSubtarget.h:60
llvm::AMDGPUDwarfFlavour
AMDGPUDwarfFlavour
Definition: AMDGPUMCTargetDesc.h:34
llvm::AMDGPUSubtarget::WavefrontSizeLog2
char WavefrontSizeLog2
Definition: AMDGPUSubtarget.h:63
llvm::AMDGPUSubtarget::makeLIDRangeMetadata
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition: AMDGPUSubtarget.cpp:507
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::AMDGPUSubtarget::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Definition: AMDGPUSubtarget.cpp:448
llvm::omp::Kernel
Function * Kernel
Summary of a kernel (=entry point for target offloading).
Definition: OpenMPOpt.h:21
llvm::Triple::getOS
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:316
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::AMDGPUSubtarget::hasMadMacF32Insts
bool hasMadMacF32Insts() const
Definition: AMDGPUSubtarget.h:140
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition: AMDGPUSubtarget.cpp:355
llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:34
llvm::AMDGPUSubtarget::hasFminFmaxLegacy
bool hasFminFmaxLegacy() const
Definition: AMDGPUSubtarget.h:168
llvm::AMDGPUSubtarget::MaxWavesPerEU
unsigned MaxWavesPerEU
Definition: AMDGPUSubtarget.h:61
llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition: AMDGPUSubtarget.h:198
llvm::MachineFunction
Definition: MachineFunction.h:230
Triple.h
llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize
virtual unsigned getMinFlatWorkGroupSize() const =0
llvm::AMDGPUSubtarget::hasTrigReducedRange
bool hasTrigReducedRange() const
Definition: AMDGPUSubtarget.h:172
llvm::AMDGPUSubtarget::hasVOP3PInsts
bool hasVOP3PInsts() const
Definition: AMDGPUSubtarget.h:152
llvm::AMDGPUSubtarget::hasMulU24
bool hasMulU24() const
Definition: AMDGPUSubtarget.h:160
llvm::AMDGPUSubtarget::getAMDGPUDwarfFlavour
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
Definition: AMDGPUSubtarget.cpp:616
uint32_t
llvm::AMDGPUSubtarget::isGCN3Encoding
bool isGCN3Encoding() const
Definition: AMDGPUSubtarget.h:128
llvm::AMDGPUSubtarget::hasDsSrc2Insts
bool hasDsSrc2Insts() const
Definition: AMDGPUSubtarget.h:144
llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
CallingConv.h
llvm::AMDGPUSubtarget::getExplicitKernArgSize
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:575
llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition: AMDGPUSubtarget.h:32
Alignment.h
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::Triple::AMDPAL
@ AMDPAL
Definition: Triple.h:197
llvm::AMDGPUSubtarget::LocalMemorySize
unsigned LocalMemorySize
Definition: AMDGPUSubtarget.h:62
llvm::AMDGPUSubtarget::getDefaultFlatWorkGroupSize
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Definition: AMDGPUSubtarget.cpp:411
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::AMDGPUSubtarget::HasMulU24
bool HasMulU24
Definition: AMDGPUSubtarget.h:56
llvm::AMDGPUSubtarget::getFlatWorkGroupSizes
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:425
llvm::AMDGPUSubtarget::R600
@ R600
Definition: AMDGPUSubtarget.h:33
llvm::AMDGPUSubtarget::isPromoteAllocaEnabled
bool isPromoteAllocaEnabled() const
Definition: AMDGPUSubtarget.h:176
llvm::AMDGPUSubtarget::HasSDWA
bool HasSDWA
Definition: AMDGPUSubtarget.h:53
llvm::AMDGPUSubtarget::HasVOP3PInsts
bool HasVOP3PInsts
Definition: AMDGPUSubtarget.h:54
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:184
llvm::AMDGPUSubtarget::HasMadMacF32Insts
bool HasMadMacF32Insts
Definition: AMDGPUSubtarget.h:51
llvm::AMDGPUSubtarget::EVERGREEN
@ EVERGREEN
Definition: AMDGPUSubtarget.h:35
llvm::AMDGPUSubtarget::isAmdHsaOrMesa
bool isAmdHsaOrMesa(const Function &F) const
Definition: AMDGPUSubtarget.h:120
llvm::codeview::PublicSymFlags::Function
@ Function
llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition: AMDGPUSubtarget.h:110