Go to the documentation of this file.
14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
53 for (
unsigned Idx = 0, Size =
Operands.size(); Idx != Size; ++Idx)
67 return SI.getNumCases();
111 std::pair<const Value *, unsigned>
113 return std::make_pair(
nullptr, -1);
122 assert(
F &&
"A concrete function must be provided to this routine.");
129 if (
F->isIntrinsic())
132 if (
F->hasLocalLinkage() || !
F->hasName())
138 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
148 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
149 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
182 bool &KnownBitsComputed)
const {
190 SimplifyAndSetOp)
const {
206 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
210 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
214 return std::tie(
C1.NumRegs,
C1.AddRecCost,
C1.NumIVMuls,
C1.NumBaseAdds,
215 C1.ScaleCost,
C1.ImmCost,
C1.SetupCost) <
276 Align Alignment)
const {
295 int64_t BaseOffset,
bool HasBaseReg,
297 unsigned AddrSpace)
const {
311 bool useAA()
const {
return false; }
326 const APInt &DemandedElts,
327 bool Insert,
bool Extract)
const {
343 bool IsZeroCmp)
const {
403 return "Generic::Unknown Register Class";
405 return "Generic::ScalarRC";
407 return "Generic::VectorRC";
426 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
430 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
431 AllowPromotionWithoutCommonHeader =
false;
462 unsigned NumStridedMemAccesses,
463 unsigned NumPrefetches,
bool HasCall)
const {
482 case Instruction::FDiv:
483 case Instruction::FRem:
484 case Instruction::SDiv:
485 case Instruction::SRem:
486 case Instruction::UDiv:
487 case Instruction::URem:
508 case Instruction::IntToPtr: {
509 unsigned SrcSize = Src->getScalarSizeInBits();
515 case Instruction::PtrToInt: {
516 unsigned DstSize = Dst->getScalarSizeInBits();
522 case Instruction::BitCast:
523 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
527 case Instruction::Trunc: {
541 unsigned Index)
const {
562 unsigned Index)
const {
567 const APInt &DemandedDstElts,
593 const Value *Ptr,
bool VariableMask,
603 bool UseMaskForCond,
bool UseMaskForGaps)
const {
609 switch (ICA.
getID()) {
612 case Intrinsic::annotation:
613 case Intrinsic::assume:
614 case Intrinsic::sideeffect:
615 case Intrinsic::pseudoprobe:
616 case Intrinsic::arithmetic_fence:
617 case Intrinsic::dbg_declare:
618 case Intrinsic::dbg_value:
619 case Intrinsic::dbg_label:
620 case Intrinsic::invariant_start:
621 case Intrinsic::invariant_end:
622 case Intrinsic::launder_invariant_group:
623 case Intrinsic::strip_invariant_group:
624 case Intrinsic::is_constant:
625 case Intrinsic::lifetime_start:
626 case Intrinsic::lifetime_end:
627 case Intrinsic::experimental_noalias_scope_decl:
628 case Intrinsic::objectsize:
629 case Intrinsic::ptr_annotation:
630 case Intrinsic::var_annotation:
631 case Intrinsic::experimental_gc_result:
632 case Intrinsic::experimental_gc_relocate:
633 case Intrinsic::coro_alloc:
634 case Intrinsic::coro_begin:
635 case Intrinsic::coro_free:
636 case Intrinsic::coro_end:
637 case Intrinsic::coro_frame:
638 case Intrinsic::coro_size:
639 case Intrinsic::coro_align:
640 case Intrinsic::coro_suspend:
641 case Intrinsic::coro_subfn_addr:
658 const SCEV *)
const {
698 Type *ExpectedType)
const {
703 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
704 unsigned SrcAlign,
unsigned DestAlign,
712 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
713 unsigned SrcAlign,
unsigned DestAlign,
715 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
717 for (
unsigned i = 0;
i != RemainingBytes;
i += OpSizeInBytes)
718 OpsOut.push_back(OpType);
723 return (Caller->getFnAttribute(
"target-cpu") ==
724 Callee->getFnAttribute(
"target-cpu")) &&
725 (Caller->getFnAttribute(
"target-features") ==
726 Callee->getFnAttribute(
"target-features"));
731 return (Caller->getFnAttribute(
"target-cpu") ==
732 Callee->getFnAttribute(
"target-cpu")) &&
733 (Caller->getFnAttribute(
"target-features") ==
734 Callee->getFnAttribute(
"target-features"));
754 unsigned AddrSpace)
const {
759 unsigned AddrSpace)
const {
771 unsigned ChainSizeInBytes,
777 unsigned ChainSizeInBytes,
801 Align Alignment)
const {
816 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
817 const auto *VectorValue = cast<Constant>(Val);
821 auto *VT = cast<FixedVectorType>(Val->
getType());
827 unsigned MaxRequiredSize =
828 VT->getElementType()->getPrimitiveSizeInBits().getFixedSize();
830 unsigned MinRequiredSize = 0;
831 for (
unsigned i = 0,
e = VT->getNumElements();
i <
e; ++
i) {
832 if (
auto *IntElement =
833 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(
i))) {
834 bool signedElement = IntElement->getValue().isNegative();
836 unsigned ElementMinRequiredSize =
837 IntElement->getValue().getMinSignedBits() - 1;
839 isSigned |= signedElement;
841 MinRequiredSize =
std::max(MinRequiredSize, ElementMinRequiredSize);
844 return MaxRequiredSize;
847 return MinRequiredSize;
850 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
851 isSigned = CI->getValue().isNegative();
852 return CI->getValue().getMinSignedBits() - 1;
855 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
857 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
860 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
862 return Cast->getSrcTy()->getScalarSizeInBits();
870 return Ptr && isa<SCEVAddRecExpr>(Ptr);
874 const SCEV *Ptr)
const {
882 int64_t MergeDistance)
const {
896 template <
typename T>
910 assert(PointeeType && Ptr &&
"can't get GEPCost of nullptr");
912 ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
913 "explicit pointee type doesn't match operand's pointee type");
915 bool HasBaseReg = (BaseGV ==
nullptr);
918 APInt BaseOffset(PtrSizeBits, 0);
922 Type *TargetType =
nullptr;
930 TargetType = GTI.getIndexedType();
933 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
936 ConstIdx = dyn_cast<ConstantInt>(Splat);
937 if (
StructType *STy = GTI.getStructTypeOrNull()) {
939 assert(ConstIdx &&
"Unexpected GEP index");
945 if (isa<ScalableVectorType>(TargetType))
947 int64_t ElementSize =
974 auto *TargetTTI =
static_cast<T *
>(
this);
977 auto *CB = dyn_cast<CallBase>(U);
978 if (CB && !isa<IntrinsicInst>(U)) {
979 if (
const Function *
F = CB->getCalledFunction()) {
980 if (!TargetTTI->isLoweredToCall(
F))
991 auto *
I = dyn_cast<Instruction>(U);
996 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
997 auto *Intrinsic = cast<IntrinsicInst>(U);
999 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1001 case Instruction::Br:
1003 case Instruction::PHI:
1004 case Instruction::Switch:
1005 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1006 case Instruction::ExtractValue:
1007 case Instruction::Freeze:
1009 case Instruction::Alloca:
1010 if (cast<AllocaInst>(U)->isStaticAlloca())
1013 case Instruction::GetElementPtr: {
1014 const auto *
GEP = cast<GEPOperator>(U);
1015 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1016 GEP->getPointerOperand(),
1020 case Instruction::FAdd:
1021 case Instruction::Sub:
1022 case Instruction::FSub:
1024 case Instruction::FMul:
1025 case Instruction::UDiv:
1026 case Instruction::SDiv:
1027 case Instruction::FDiv:
1028 case Instruction::URem:
1029 case Instruction::SRem:
1030 case Instruction::FRem:
1031 case Instruction::Shl:
1032 case Instruction::LShr:
1033 case Instruction::AShr:
1034 case Instruction::And:
1035 case Instruction::Or:
1036 case Instruction::Xor:
1037 case Instruction::FNeg: {
1045 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind,
1049 case Instruction::IntToPtr:
1050 case Instruction::PtrToInt:
1051 case Instruction::SIToFP:
1052 case Instruction::UIToFP:
1053 case Instruction::FPToUI:
1054 case Instruction::FPToSI:
1055 case Instruction::Trunc:
1056 case Instruction::FPTrunc:
1057 case Instruction::BitCast:
1058 case Instruction::FPExt:
1059 case Instruction::SExt:
1060 case Instruction::ZExt:
1061 case Instruction::AddrSpaceCast: {
1063 return TargetTTI->getCastInstrCost(
1067 auto *
SI = cast<StoreInst>(U);
1069 return TargetTTI->getMemoryOpCost(Opcode, ValTy,
SI->getAlign(),
1070 SI->getPointerAddressSpace(),
1074 auto *LI = cast<LoadInst>(U);
1085 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1086 LoadType = TI->getDestTy();
1088 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1093 const Value *Op0, *Op1;
1106 return TargetTTI->getArithmeticInstrCost(
1111 return TargetTTI->getCmpSelInstrCost(Opcode, U->
getType(), CondTy,
1115 case Instruction::ICmp:
1116 case Instruction::FCmp: {
1119 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->
getType(),
1120 I ? cast<CmpInst>(
I)->getPredicate()
1124 case Instruction::InsertElement: {
1125 auto *
IE = dyn_cast<InsertElementInst>(U);
1129 if (
auto *CI = dyn_cast<ConstantInt>(
IE->getOperand(2)))
1130 if (CI->getValue().getActiveBits() <= 32)
1131 Idx = CI->getZExtValue();
1132 return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx);
1134 case Instruction::ShuffleVector: {
1135 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1139 auto *VecTy = cast<VectorType>(U->
getType());
1141 int NumSubElts, SubIndex;
1143 if (Shuffle->changesLength()) {
1145 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1148 if (Shuffle->isExtractSubvectorMask(SubIndex))
1150 Shuffle->getShuffleMask(), SubIndex,
1153 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1154 return TargetTTI->getShuffleCost(
1160 int ReplicationFactor, VF;
1161 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1162 APInt DemandedDstElts =
1164 for (
auto I :
enumerate(Shuffle->getShuffleMask())) {
1166 DemandedDstElts.
setBit(
I.index());
1168 return TargetTTI->getReplicationShuffleCost(
1169 VecSrcTy->getElementType(), ReplicationFactor, VF,
1176 if (Shuffle->isIdentity())
1179 if (Shuffle->isReverse())
1181 Shuffle->getShuffleMask(), 0,
nullptr,
1184 if (Shuffle->isSelect())
1186 Shuffle->getShuffleMask(), 0,
nullptr,
1189 if (Shuffle->isTranspose())
1191 Shuffle->getShuffleMask(), 0,
nullptr,
1194 if (Shuffle->isZeroEltSplat())
1196 Shuffle->getShuffleMask(), 0,
nullptr,
1199 if (Shuffle->isSingleSource())
1201 Shuffle->getShuffleMask(), 0,
nullptr,
1204 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1205 return TargetTTI->getShuffleCost(
1210 Shuffle->getShuffleMask(), 0,
nullptr,
1213 case Instruction::ExtractElement: {
1214 auto *EEI = dyn_cast<ExtractElementInst>(U);
1218 if (
auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
1219 if (CI->getValue().getActiveBits() <= 32)
1220 Idx = CI->getZExtValue();
1222 return TargetTTI->getVectorInstrCost(Opcode, DstTy, Idx);
1234 if (isa<LoadInst>(
I))
1237 Type *DstTy =
I->getType();
1241 if (
auto *CI = dyn_cast<CallInst>(
I)) {
1242 const Function *
F = CI->getCalledFunction();
1247 if (
StructType *StructTy = dyn_cast<StructType>(DstTy))
1248 DstTy = StructTy->getElementType(0);
1252 if (
VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
1253 DstTy = VectorTy->getElementType();
This is an optimization pass for GlobalISel generic memory operations.
A parsed version of the target data layout string in and methods for querying it.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
ScalarTy getFixedSize() const
Represents a single loop in the control flow graph.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
instcombine should handle this C2 when C1
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
int64_t getSExtValue() const
Get sign extended value.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
The main scalar evolution driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getBitWidth() const
Return the number of bits in the APInt.
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
gep_type_iterator gep_type_begin(const User *GEP)
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
constexpr int UndefMaskElem
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
This is the shared class of boolean and integer constants.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static ElementCount get(ScalarTy MinVal, bool Scalable)
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool match(Val *V, const Pattern &P)
(vector float) vec_cmpeq(*A, *B) C
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis containing CSE Info
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static TypeSize getFixed(ScalarTy MinVal)
Analysis providing profile information.
mir Rename Register Operands
Base class of all SIMD vector types.
This class represents an analyzed expression in the program.
An instruction for storing to memory.
This is an important base class in LLVM.
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
This class represents a truncation of integer types.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
This class represents a constant integer value.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StandardInstrumentations SI(Debug, VerifyEach)
print Print MemDeps of function
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent struct types.
StringRef - Represent a constant reference to a string, i.e.
A cache of @llvm.assume calls within a function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
An instruction for reading from memory.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
amdgpu Simplify well known AMD library false FunctionCallee Callee
static APInt getNullValue(unsigned numBits)
NOTE: This is soft-deprecated. Please use getZero() instead.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
uint64_t getElementOffset(unsigned Idx) const
This is the common base class for vector predication intrinsics.
TargetTransformInfo::VPLegalization VPLegalization
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
This node represents a polynomial recurrence on the trip count of the specified loop.
constexpr unsigned BitWidth
Provides information about what library functions are available for the current target.
iterator_range< value_op_iterator > operand_values()
The core instruction combiner logic.
const APInt & getAPInt() const
A wrapper class for inspecting calls to intrinsic functions.
Attributes of a target dependent hardware loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Intrinsic::ID getID() const
Align max(MaybeAlign Lhs, Align Rhs)
Information about a load/store intrinsic defined by the target.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * getOperand(unsigned i) const
Conditional or Unconditional Branch instruction.
LLVM Value Representation.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...