16#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
56 enum MemIntrinsicType {
57 VECTOR_LDST_TWO_ELEMENTS,
58 VECTOR_LDST_THREE_ELEMENTS,
59 VECTOR_LDST_FOUR_ELEMENTS
65 Type *isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
67 Type *SrcOverrideTy =
nullptr)
const;
70 bool isSingleExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
72 Type *SrcOverrideTy =
nullptr)
const;
84 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {})
const;
89 TLI(ST->getTargetLowering()) {}
92 const Function *Callee)
const override;
98 unsigned DefaultCallPenalty)
const override;
132 bool Vector = (ClassID == 1);
145 std::optional<Instruction *>
152 SimplifyAndSetOp)
const override;
158 return ST->getMinVectorRegisterBitWidth();
162 return ST->getVScaleForTuning();
188 unsigned Opcode2)
const;
197 bool VariableMask,
Align Alignment,
219 unsigned Index,
const Value *Op0,
220 const Value *Op1)
const override;
228 unsigned Index,
Value *Scalar,
229 ArrayRef<std::tuple<Value *, User *, int>>
230 ScalarUserAndIdx)
const override;
234 unsigned Index)
const override;
239 unsigned Index)
const override;
285 OptimizationRemarkEmitter *ORE)
const override;
292 bool CanCreate =
true)
const override;
295 MemIntrinsicInfo &
Info)
const override;
298 if (Ty->isPointerTy())
301 if (Ty->isBFloatTy() && ST->hasBF16())
304 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
307 if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
308 Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
320 DataType->getPrimitiveSizeInBits() != 128)
327 unsigned )
const override {
332 unsigned )
const override {
337 if (!ST->isSVEAvailable())
342 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
343 DataTypeFVTy->getNumElements() < 2))
360 if (!ST->hasNEON() || NumElements.
isScalable())
368 unsigned VectorBits = NumElements.
getFixedValue() * ElementBits;
369 return VectorBits >= 64;
384 unsigned NumElements = DataTypeTy->getNumElements();
385 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
386 return NumElements > 1 &&
isPowerOf2_64(NumElements) && EltSize >= 8 &&
398 if (ST->isLittleEndian())
404 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
414 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const override;
418 bool &AllowPromotionWithoutCommonHeader)
const override;
427 return ST->hasSVE() ? 5 : 0;
433 return IVUpdateMayOverflow
447 return ST->isSVEorStreamingSVEAvailable();
459 std::optional<InstructionCost>
467 std::optional<FastMathFlags> FMF,
472 VectorType *ValTy, std::optional<FastMathFlags> FMF,
483 const Instruction *CxtI =
nullptr)
const override;
486 VectorType *Ty,
const APInt &DemandedElts,
bool Insert,
bool Extract,
496 StackOffset BaseOffset,
bool HasBaseReg,
498 unsigned AddrSpace)
const override;
501 return ST->enableSelectOptimize();
507 Type *ScalarValTy)
const override {
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
uint64_t IntrinsicInst * II
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalNTLoad(Type *DataType, Align Alignment) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override
bool isVScaleKnownToBeAPowerOfTwo() const override
bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
bool prefersVectorizedAddressing() const override
unsigned getMinTripCountTailFoldingThreshold() const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
bool supportsScalableVectors() const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
bool enableSelectOptimize() const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
bool preferPredicatedReductionSelect() const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool isLegalNTStore(Type *DataType, Align Alignment) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
std::optional< unsigned > getMinPageSize() const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
unsigned getMinVectorRegisterBitWidth() const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned) const override
bool enableOrderedReductions() const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
bool enableInterleavedAccessVectorization() const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getGISelRematGlobalCost() const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
bool enableMaskedInterleavedAccessVectorization() const override
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
The core instruction combiner logic.
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
ArrayRef(const T &OneElt) -> ArrayRef< T >
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.