29#define DEBUG_TYPE "hexagontti"
36 cl::desc(
"Enable auto-vectorization of floatint point types on v68."));
40 cl::desc(
"Control lookup table emission on Hexagon target"));
50bool HexagonTTIImpl::useHVX()
const {
54bool HexagonTTIImpl::isHVXVectorType(
Type *Ty)
const {
55 auto *VecTy = dyn_cast<VectorType>(Ty);
60 if (ST.
useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy())
65unsigned HexagonTTIImpl::getTypeNumElements(
Type *Ty)
const {
66 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty))
67 return VTy->getNumElements();
69 "Expecting scalar type");
91 if (L && L->isInnermost() &&
canPeel(L) &&
109 return useHVX() ? 32 : 0;
114 return useHVX() ? 2 : 1;
136 bool IsScalable)
const {
137 assert(!IsScalable &&
"Scalable VFs are not supported for Hexagon");
164 if (ICA.
getID() == Intrinsic::bswap) {
165 std::pair<InstructionCost, MVT> LT =
184 assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
189 if (Opcode == Instruction::Store)
193 if (Src->isVectorTy()) {
196 if (isHVXVectorType(VecTy)) {
200 assert(RegWidth &&
"Non-zero vector register width expected");
202 if (VecWidth % RegWidth == 0)
203 return VecWidth / RegWidth;
205 const Align RegAlign(RegWidth / 8);
206 if (!Alignment || *Alignment > RegAlign)
207 Alignment = RegAlign;
209 unsigned AlignWidth = 8 * Alignment->value();
210 unsigned NumLoads =
alignTo(VecWidth, AlignWidth) / AlignWidth;
221 unsigned AlignWidth = 8 * BoundAlignment.
value();
222 unsigned NumLoads =
alignTo(VecWidth, AlignWidth) / AlignWidth;
223 if (Alignment ==
Align(4) || Alignment ==
Align(8))
224 return Cost * NumLoads;
227 unsigned LogA =
Log2(BoundAlignment);
228 return (3 - LogA) *
Cost * NumLoads;
252 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
261 bool UseMaskForCond,
bool UseMaskForGaps) {
262 if (Indices.
size() != Factor || UseMaskForCond || UseMaskForGaps)
266 UseMaskForCond, UseMaskForGaps);
280 if (Opcode == Instruction::FCmp)
281 return LT.first +
FloatFactor * getTypeNumElements(ValTy);
294 Op2Info, Args, CxtI);
300 if (LT.second.isFloatingPoint())
301 return LT.first +
FloatFactor * getTypeNumElements(Ty);
312 auto isNonHVXFP = [
this] (
Type *Ty) {
315 if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy))
325 std::max(SrcLT.first, DstLT.first) +
FloatFactor * (SrcN + DstN);
328 return Cost == 0 ? 0 : 1;
338 Type *ElemTy = Val->
isVectorTy() ? cast<VectorType>(Val)->getElementType()
340 if (Opcode == Instruction::InsertElement) {
350 if (Opcode == Instruction::ExtractElement)
382 auto isCastFoldedIntoLoad = [
this](
const CastInst *CI) ->
bool {
383 if (!CI->isIntegerCast())
390 if (DBW != 32 || SBW >= DBW)
393 const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
400 if (
const CastInst *CI = dyn_cast<const CastInst>(U))
401 if (isCastFoldedIntoLoad(CI))
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
mir Rename Register Operands
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
This is the base class for all instructions that perform data casts.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
static constexpr ElementCount getFixed(ScalarTy MinVal)
unsigned getL1PrefetchDistance() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV68Ops() const
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
unsigned getNumberOfRegisters(bool vector) const
— Vector TTI begin —
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool shouldBuildLookupTables() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
bool isLegalMaskedStore(Type *DataType, Align Alignment)
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Bias LSR towards creating post-increment opportunities.
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, ArrayRef< const Value * > Args=std::nullopt)
unsigned getMinVectorRegisterBitWidth() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
unsigned getCacheLineSize() const override
unsigned getPrefetchDistance() const override
— Vector TTI end —
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
unsigned getMaxInterleaveFactor(ElementCount VF)
static InstructionCost getMax()
Type * getReturnType() const
Intrinsic::ID getID() const
An instruction for reading from memory.
Represents a single loop in the control flow graph.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool canPeel(const Loop *L)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
unsigned Log2(Align A)
Returns the log2 of the alignment.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.