Go to the documentation of this file.
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
33 class ARMTargetLowering;
37 class ScalarEvolution;
41 namespace TailPredication {
72 ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
73 ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
74 ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
75 ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
76 ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
77 ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
78 ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
79 ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
80 ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
81 ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
82 ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
83 ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
84 ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
85 ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
86 ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
87 ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
88 ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
89 ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
90 ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
91 ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
92 ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
93 ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
94 ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
95 ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
104 TLI(ST->getTargetLowering()) {}
118 return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
127 SimplifyAndSetOp)
const;
150 bool Vector = (ClassID == 1);
154 if (ST->hasMVEIntegerOps())
159 if (ST->isThumb1Only())
171 if (ST->hasMVEIntegerOps())
181 return ST->getMaxInterleaveFactor();
267 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false);
270 const Value *Ptr,
bool VariableMask,
309 if (ST->isROPI() || ST->isRWPI())
310 return !
C->needsDynamicRelocation();
322 "Only possible block sizes for VREV are: 16, 32, 64");
325 if (EltSz != 8 && EltSz != 16 && EltSz != 32)
328 unsigned BlockElts =
M[0] + 1;
336 for (
unsigned i = 0,
e =
M.size();
i <
e; ++
i) {
339 if ((
unsigned)
M[
i] != (
i -
i % BlockElts) + (BlockElts - 1 -
i % BlockElts))
348 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
bool shouldBuildLookupTablesForConstant(Constant *C) const
This is an optimization pass for GlobalISel generic memory operations.
static const int BlockSize
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Represents a single loop in the control flow graph.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
The main scalar evolution driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
The instances of the Type class are immutable: once they are created, they are never changed.
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Container class for subtarget features.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool isLegalMaskedLoad(Type *DataTy, Align Alignment)
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
(vector float) vec_cmpeq(*A, *B) C
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
bool isLoweredToCall(const Function *F)
bool emitGetActiveLaneMask() const
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
bool shouldExpandReduction(const IntrinsicInst *II) const
static TypeSize getFixed(ScalarTy MinVal)
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
bool isProfitableLSRChainElement(Instruction *I)
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)
bool isFPVectorizationPotentiallyUnsafe()
Floating-point computation using ARMv8 AArch32 Advanced SIMD instructions remains unchanged from ARMv...
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
Base class of all SIMD vector types.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
This class represents an analyzed expression in the program.
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)
This is an important base class in LLVM.
InstructionCost getMemcpyCost(const Instruction *I)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Op1Info=TTI::OK_AnyValue, TTI::OperandValueKind Op2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Drive the analysis of memory accesses in the loop.
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Base class which can be used to help build a TTI implementation.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
print Print MemDeps of function
Class for arbitrary precision integers.
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
uint64_t getScalarSizeInBits() const
A cache of @llvm.assume calls within a function.
bool isLegalMaskedGather(Type *Ty, Align Alignment)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
static const Function * getParent(const Value *V)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
static TypeSize getScalable(ScalarTy MinVal)
Provides information about what library functions are available for the current target.
ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
bool isLegalMaskedScatter(Type *Ty, Align Alignment)
The core instruction combiner logic.
A wrapper class for inspecting calls to intrinsic functions.
Attributes of a target dependent hardware loop.
bool maybeLoweredToCall(Instruction &I)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalMaskedStore(Type *DataTy, Align Alignment)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
const char LLVMTargetMachineRef TM
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
unsigned getMaxInterleaveFactor(unsigned VF)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
LLVM Value Representation.
@ ForceEnabledNoReductions