LLVM  10.0.0svn
AMDGPUTargetTransformInfo.h
Go to the documentation of this file.
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/IR/Function.h"
31 #include <cassert>
32 
33 namespace llvm {
34 
35 class AMDGPUTargetLowering;
36 class Loop;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 
41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
43  using TTI = TargetTransformInfo;
44 
45  friend BaseT;
46 
47  Triple TargetTriple;
48 
49  const TargetSubtargetInfo *ST;
50  const TargetLoweringBase *TLI;
51 
52  const TargetSubtargetInfo *getST() const { return ST; }
53  const TargetLoweringBase *getTLI() const { return TLI; }
54 
55 public:
56  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
57  : BaseT(TM, F.getParent()->getDataLayout()),
58  TargetTriple(TM->getTargetTriple()),
59  ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
60  TLI(ST->getTargetLowering()) {}
61 
64 };
65 
66 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
68  using TTI = TargetTransformInfo;
69 
70  friend BaseT;
71 
72  const GCNSubtarget *ST;
73  const AMDGPUTargetLowering *TLI;
74  AMDGPUTTIImpl CommonTTI;
75  bool IsGraphicsShader;
76 
77  const FeatureBitset InlineFeatureIgnoreList = {
78  // Codegen control options which don't matter.
79  AMDGPU::FeatureEnableLoadStoreOpt,
80  AMDGPU::FeatureEnableSIScheduler,
81  AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
82  AMDGPU::FeatureFlatForGlobal,
83  AMDGPU::FeaturePromoteAlloca,
84  AMDGPU::FeatureUnalignedBufferAccess,
85  AMDGPU::FeatureUnalignedScratchAccess,
86 
87  AMDGPU::FeatureAutoWaitcntBeforeBarrier,
88 
89  // Property of the kernel/environment which can't actually differ.
90  AMDGPU::FeatureSGPRInitBug,
91  AMDGPU::FeatureXNACK,
92  AMDGPU::FeatureTrapHandler,
93  AMDGPU::FeatureCodeObjectV3,
94 
95  // The default assumption needs to be ecc is enabled, but no directly
96  // exposed operations depend on it, so it can be safely inlined.
97  AMDGPU::FeatureSRAMECC,
98 
99  // Perf-tuning features
100  AMDGPU::FeatureFastFMAF32,
101  AMDGPU::HalfRate64Ops
102  };
103 
104  const GCNSubtarget *getST() const { return ST; }
105  const AMDGPUTargetLowering *getTLI() const { return TLI; }
106 
107  static inline int getFullRateInstrCost() {
109  }
110 
111  static inline int getHalfRateInstrCost() {
113  }
114 
115  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
116  // should be 2 or 4.
117  static inline int getQuarterRateInstrCost() {
119  }
120 
121  // On some parts, normal fp64 operations are half rate, and others
122  // quarter. This also applies to some integer operations.
123  inline int get64BitInstrCost() const {
124  return ST->hasHalfRate64Ops() ?
125  getHalfRateInstrCost() : getQuarterRateInstrCost();
126  }
127 
128 public:
129  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
130  : BaseT(TM, F.getParent()->getDataLayout()),
131  ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
132  TLI(ST->getTargetLowering()),
133  CommonTTI(TM, F),
134  IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
135 
136  bool hasBranchDivergence() { return true; }
137 
140 
142  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
143  return TTI::PSK_FastHardware;
144  }
145 
146  unsigned getHardwareNumberOfRegisters(bool Vector) const;
147  unsigned getNumberOfRegisters(bool Vector) const;
148  unsigned getRegisterBitWidth(bool Vector) const;
149  unsigned getMinVectorRegisterBitWidth() const;
150  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
151  unsigned ChainSizeInBytes,
152  VectorType *VecTy) const;
153  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
154  unsigned ChainSizeInBytes,
155  VectorType *VecTy) const;
156  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
157 
158  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
159  unsigned Alignment,
160  unsigned AddrSpace) const;
161  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
162  unsigned Alignment,
163  unsigned AddrSpace) const;
164  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
165  unsigned Alignment,
166  unsigned AddrSpace) const;
167 
168  unsigned getMaxInterleaveFactor(unsigned VF);
169 
171 
173  unsigned Opcode, Type *Ty,
179 
180  unsigned getCFInstrCost(unsigned Opcode);
181 
182  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
183  bool isSourceOfDivergence(const Value *V) const;
184  bool isAlwaysUniform(const Value *V) const;
185 
186  unsigned getFlatAddressSpace() const {
187  // Don't bother running InferAddressSpaces pass on graphics shaders which
188  // don't use flat addressing.
189  if (IsGraphicsShader)
190  return -1;
191  return AMDGPUAS::FLAT_ADDRESS;
192  }
193 
195  Intrinsic::ID IID) const;
197  Value *OldV, Value *NewV) const;
198 
199  unsigned getVectorSplitCost() { return 0; }
200 
201  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
202  Type *SubTp);
203 
204  bool areInlineCompatible(const Function *Caller,
205  const Function *Callee) const;
206 
207  unsigned getInliningThresholdMultiplier() { return 9; }
208 
209  int getInlinerVectorBonusPercent() { return 0; }
210 
211  int getArithmeticReductionCost(unsigned Opcode,
212  Type *Ty,
213  bool IsPairwise);
214  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
215  bool IsPairwiseForm,
216  bool IsUnsigned);
217  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
218 };
219 
220 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
222  using TTI = TargetTransformInfo;
223 
224  friend BaseT;
225 
226  const R600Subtarget *ST;
227  const AMDGPUTargetLowering *TLI;
228  AMDGPUTTIImpl CommonTTI;
229 
230 public:
231  explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
232  : BaseT(TM, F.getParent()->getDataLayout()),
233  ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
234  TLI(ST->getTargetLowering()),
235  CommonTTI(TM, F) {}
236 
237  const R600Subtarget *getST() const { return ST; }
238  const AMDGPUTargetLowering *getTLI() const { return TLI; }
239 
242  unsigned getHardwareNumberOfRegisters(bool Vec) const;
243  unsigned getNumberOfRegisters(bool Vec) const;
244  unsigned getRegisterBitWidth(bool Vector) const;
245  unsigned getMinVectorRegisterBitWidth() const;
246  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
247  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
248  unsigned AddrSpace) const;
249  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
250  unsigned Alignment,
251  unsigned AddrSpace) const;
252  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
253  unsigned Alignment,
254  unsigned AddrSpace) const;
255  unsigned getMaxInterleaveFactor(unsigned VF);
256  unsigned getCFInstrCost(unsigned Opcode);
257  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
258 };
259 
260 } // end namespace llvm
261 
262 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:621
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: BasicTTIImpl.h:219
The main scalar evolution driver.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
F(f)
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:619
bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: BasicTTIImpl.h:224
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getFlatAddressSpace() const
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
mir Rename Register Operands
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:556
bool hasHalfRate64Ops() const
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
const R600Subtarget * getST() const
Container class for subtarget features.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Address space for flat memory.
Definition: AMDGPU.h:269
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
unsigned getUserCost(const User *U, ArrayRef< const Value * > Operands)
const AMDGPUTargetLowering * getTLI() const
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
The AMDGPU TargetMachine interface definition for hw codgen targets.
unsigned getNumberOfRegisters(unsigned ClassID) const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:820
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:668
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Class to represent vector types.
Definition: DerivedTypes.h:432
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool isShader(CallingConv::ID cc)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
TargetSubtargetInfo - Generic base class for all target subtargets.
Provides AMDGPU specific target descriptions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:865
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:210
aarch64 promote const
LLVM Value Representation.
Definition: Value.h:74
static const Function * getParent(const Value *V)
unsigned getInliningThresholdMultiplier()
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
const DataLayout & getDataLayout() const
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
Information about a load/store intrinsic defined by the target.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.