LLVM 17.0.0git
Go to the documentation of this file.
1//===- RISCVTargetTransformInfo.h - RISC-V specific TTI ---------*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8/// \file
9/// This file defines a TargetTransformInfo::Concept conforming object specific
10/// to the RISC-V target machine. It uses the target's detailed information to
11/// provide more precise answers to certain TTI queries, while letting the
12/// target independent and default TTI implementations handle the rest.
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
24#include "llvm/IR/Function.h"
25#include <optional>
27namespace llvm {
29class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
33 friend BaseT;
35 const RISCVSubtarget *ST;
36 const RISCVTargetLowering *TLI;
38 const RISCVSubtarget *getST() const { return ST; }
39 const RISCVTargetLowering *getTLI() const { return TLI; }
41 /// This function returns an estimate for VL to be used in VL based terms
42 /// of the cost model. For fixed length vectors, this is simply the
43 /// vector length. For scalable vectors, we return results consistent
44 /// with getVScaleForTuning under the assumption that clients are also
45 /// using that when comparing costs between scalar and vector representation.
46 /// This does unfortunately mean that we can both undershoot and overshot
47 /// the true cost significantly if getVScaleForTuning is wildly off for the
48 /// actual target hardware.
49 unsigned getEstimatedVLFor(VectorType *Ty);
51 /// Return the cost of LMUL. The larger the LMUL, the higher the cost.
52 InstructionCost getLMULCost(MVT VT);
55 explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
56 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
57 TLI(ST->getTargetLowering()) {}
59 /// Return the cost of materializing an immediate for a value operand of
60 /// a store instruction.
66 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
67 const APInt &Imm, Type *Ty,
69 Instruction *Inst = nullptr);
71 const APInt &Imm, Type *Ty,
76 bool shouldExpandReduction(const IntrinsicInst *II) const;
77 bool supportsScalableVectors() const { return ST->hasVInstructions(); }
78 bool enableOrderedReductions() const { return true; }
79 bool enableScalableVectorization() const { return ST->hasVInstructions(); }
81 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
82 return ST->hasVInstructions() ? TailFoldingStyle::Data
84 }
85 std::optional<unsigned> getMaxVScale() const;
86 std::optional<unsigned> getVScaleForTuning() const;
90 unsigned getRegUsageForType(Type *Ty);
92 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
95 // Epilogue vectorization is usually unprofitable - tail folding or
96 // a smaller VF would have been better. This a blunt hammer - we
97 // should re-examine this once vectorization is better tuned.
98 return false;
99 }
101 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
102 Align Alignment, unsigned AddressSpace,
113 return ST->useRVVForFixedLengthVectors() ? 16 : 0;
114 }
117 ArrayRef<int> Mask,
119 VectorType *SubTp,
120 ArrayRef<const Value *> Args = std::nullopt);
126 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
127 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
128 bool UseMaskForCond = false, bool UseMaskForGaps = false);
130 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
131 const Value *Ptr, bool VariableMask,
132 Align Alignment,
134 const Instruction *I);
136 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
139 const Instruction *I = nullptr);
142 bool IsUnsigned,
146 std::optional<FastMathFlags> FMF,
149 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
150 Type *ResTy, VectorType *ValTy,
151 std::optional<FastMathFlags> FMF,
155 getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
158 const Instruction *I = nullptr);
160 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
161 CmpInst::Predicate VecPred,
163 const Instruction *I = nullptr);
166 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
168 unsigned Index, Value *Op0, Value *Op1);
170 InstructionCost getArithmeticInstrCost(
171 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
172 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
173 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
174 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
175 const Instruction *CxtI = nullptr);
178 return TLI->isLegalElementTypeForRVV(Ty);
179 }
181 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
182 if (!ST->hasVInstructions())
183 return false;
185 // Only support fixed vectors if we know the minimum vector size.
186 if (isa<FixedVectorType>(DataType) && !ST->useRVVForFixedLengthVectors())
187 return false;
189 if (Alignment <
191 return false;
193 return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
194 }
196 bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
197 return isLegalMaskedLoadStore(DataType, Alignment);
198 }
199 bool isLegalMaskedStore(Type *DataType, Align Alignment) {
200 return isLegalMaskedLoadStore(DataType, Alignment);
201 }
203 bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment) {
204 if (!ST->hasVInstructions())
205 return false;
207 // Only support fixed vectors if we know the minimum vector size.
208 if (isa<FixedVectorType>(DataType) && !ST->useRVVForFixedLengthVectors())
209 return false;
211 if (Alignment <
213 return false;
215 return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
216 }
218 bool isLegalMaskedGather(Type *DataType, Align Alignment) {
219 return isLegalMaskedGatherScatter(DataType, Alignment);
220 }
221 bool isLegalMaskedScatter(Type *DataType, Align Alignment) {
222 return isLegalMaskedGatherScatter(DataType, Alignment);
223 }
226 // Scalarize masked gather for RV64 if EEW=64 indices aren't supported.
227 return ST->is64Bit() && !ST->hasVInstructionsI64();
228 }
231 // Scalarize masked scatter for RV64 if EEW=64 indices aren't supported.
232 return ST->is64Bit() && !ST->hasVInstructionsI64();
233 }
235 /// \returns How the target needs this vector-predicated operation to be
236 /// transformed.
240 if (!ST->hasVInstructions() ||
241 (PI.getIntrinsicID() == Intrinsic::vp_reduce_mul &&
242 cast<VectorType>(PI.getArgOperand(1)->getType())
243 ->getElementType()
244 ->getIntegerBitWidth() != 1))
247 }
250 ElementCount VF) const {
251 if (!VF.isScalable())
252 return true;
254 Type *Ty = RdxDesc.getRecurrenceType();
255 if (!TLI->isLegalElementTypeForRVV(Ty))
256 return false;
258 switch (RdxDesc.getRecurrenceKind()) {
259 case RecurKind::Add:
260 case RecurKind::FAdd:
261 case RecurKind::And:
262 case RecurKind::Or:
263 case RecurKind::Xor:
264 case RecurKind::SMin:
265 case RecurKind::SMax:
266 case RecurKind::UMin:
267 case RecurKind::UMax:
268 case RecurKind::FMin:
269 case RecurKind::FMax:
273 return true;
274 default:
275 return false;
276 }
277 }
280 // Don't interleave if the loop has been vectorized with scalable vectors.
281 if (VF.isScalable())
282 return 1;
283 // If the loop will not be vectorized, don't interleave the loop.
284 // Let regular unroll to unroll the loop.
285 return VF.isScalar() ? 1 : ST->getMaxInterleaveFactor();
286 }
291 unsigned getNumberOfRegisters(unsigned ClassID) const {
292 switch (ClassID) {
294 // 31 = 32 GPR - x0 (zero register)
295 // FIXME: Should we exclude fixed registers like SP, TP or GP?
296 return 31;
298 if (ST->hasStdExtF())
299 return 32;
300 return 0;
302 // Although there are 32 vector registers, v0 is special in that it is the
303 // only register that can be used to hold a mask.
304 // FIXME: Should we conservatively return 31 as the number of usable
305 // vector registers?
306 return ST->hasVInstructions() ? 32 : 0;
307 }
308 llvm_unreachable("unknown register class");
309 }
311 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
312 if (Vector)
314 if (!Ty)
317 Type *ScalarTy = Ty->getScalarType();
318 if ((ScalarTy->isHalfTy() && ST->hasStdExtZfhOrZfhmin()) ||
319 (ScalarTy->isFloatTy() && ST->hasStdExtF()) ||
320 (ScalarTy->isDoubleTy() && ST->hasStdExtD())) {
322 }
325 }
327 const char *getRegisterClassName(unsigned ClassID) const {
328 switch (ClassID) {
330 return "RISCV::GPRRC";
332 return "RISCV::FPRRC";
334 return "RISCV::VRRC";
335 }
336 llvm_unreachable("unknown register class");
337 }
343} // end namespace llvm
static const Function * getParent(const Value *V)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
TargetTransformInfo::VPLegalization VPLegalization
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:75
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:79
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1353
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:468
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:302
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Machine Value Type.
The optimization diagnostic interface.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
const char * getRegisterClassName(unsigned ClassID) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const
TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
unsigned getMaxInterleaveFactor(ElementCount VF)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool enableScalableVectorization() const
bool preferEpilogueVectorization() const
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool supportsScalableVectors() const
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
unsigned getRegUsageForType(Type *Ty)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool enableOrderedReductions() const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
unsigned getMinVectorRegisterBitWidth() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalElementTypeForRVV(Type *ScalarTy) const
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
The main scalar evolution driver.
const DataLayout & getDataLayout() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The kind of cost model.
Flags indicating the kind of support for population count.
The various kinds of shuffle patterns for vector queries.
Represents a hint about the context in which a cast is used.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:350
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Definition: NVPTXBaseInfo.h:21
@ UMin
Unisgned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ SelectFCmp
Integer select(fcmp(),x,y) where one of (x,y) is loop invariant.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Fused multiply-add of floats (a * b + c).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ SelectICmp
Integer select(icmp(),x,y) where one of (x,y) is loop invariant.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Parameters that control the generic loop unrolling transformation.