Go to the documentation of this file.
14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
69 return SI.getNumCases();
116 assert(
F &&
"A concrete function must be provided to this routine.");
123 if (
F->isIntrinsic())
126 if (
F->hasLocalLinkage() || !
F->hasName())
132 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
142 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
143 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
176 bool &KnownBitsComputed)
const {
184 SimplifyAndSetOp)
const {
199 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
203 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
207 return std::tie(
C1.NumRegs,
C1.AddRecCost,
C1.NumIVMuls,
C1.NumBaseAdds,
208 C1.ScaleCost,
C1.ImmCost,
C1.SetupCost) <
241 unsigned DataSize =
DL.getTypeStoreSize(DataType);
248 unsigned DataSize =
DL.getTypeStoreSize(DataType);
273 bool HasBaseReg, int64_t Scale,
274 unsigned AddrSpace)
const {
276 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
288 bool useAA()
const {
return false; }
303 bool Insert,
bool Extract)
const {
319 bool IsZeroCmp)
const {
378 return "Generic::Unknown Register Class";
380 return "Generic::ScalarRC";
382 return "Generic::VectorRC";
400 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
403 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
404 AllowPromotionWithoutCommonHeader =
false;
435 unsigned NumStridedMemAccesses,
436 unsigned NumPrefetches,
bool HasCall)
const {
457 case Instruction::FDiv:
458 case Instruction::FRem:
459 case Instruction::SDiv:
460 case Instruction::SRem:
461 case Instruction::UDiv:
462 case Instruction::URem:
482 case Instruction::IntToPtr: {
484 if (
DL.isLegalInteger(SrcSize) &&
485 SrcSize <=
DL.getPointerTypeSizeInBits(Dst))
489 case Instruction::PtrToInt: {
491 if (
DL.isLegalInteger(DstSize) &&
492 DstSize >=
DL.getPointerTypeSizeInBits(Src))
496 case Instruction::BitCast:
501 case Instruction::Trunc: {
515 unsigned Index)
const {
536 unsigned Index)
const {
554 const Value *Ptr,
bool VariableMask,
564 bool UseMaskForCond,
bool UseMaskForGaps)
const {
570 switch (ICA.
getID()) {
573 case Intrinsic::annotation:
574 case Intrinsic::assume:
575 case Intrinsic::sideeffect:
576 case Intrinsic::pseudoprobe:
577 case Intrinsic::dbg_declare:
578 case Intrinsic::dbg_value:
579 case Intrinsic::dbg_label:
580 case Intrinsic::invariant_start:
581 case Intrinsic::invariant_end:
582 case Intrinsic::launder_invariant_group:
583 case Intrinsic::strip_invariant_group:
584 case Intrinsic::is_constant:
585 case Intrinsic::lifetime_start:
586 case Intrinsic::lifetime_end:
587 case Intrinsic::experimental_noalias_scope_decl:
588 case Intrinsic::objectsize:
589 case Intrinsic::ptr_annotation:
590 case Intrinsic::var_annotation:
591 case Intrinsic::experimental_gc_result:
592 case Intrinsic::experimental_gc_relocate:
593 case Intrinsic::coro_alloc:
594 case Intrinsic::coro_begin:
595 case Intrinsic::coro_free:
596 case Intrinsic::coro_end:
597 case Intrinsic::coro_frame:
598 case Intrinsic::coro_size:
599 case Intrinsic::coro_suspend:
600 case Intrinsic::coro_param:
601 case Intrinsic::coro_subfn_addr:
616 const SCEV *)
const {
654 Type *ExpectedType)
const {
659 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
660 unsigned SrcAlign,
unsigned DestAlign)
const {
666 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
667 unsigned SrcAlign,
unsigned DestAlign)
const {
668 for (
unsigned i = 0;
i != RemainingBytes; ++
i)
674 return (Caller->getFnAttribute(
"target-cpu") ==
675 Callee->getFnAttribute(
"target-cpu")) &&
676 (Caller->getFnAttribute(
"target-features") ==
677 Callee->getFnAttribute(
"target-features"));
683 return (Caller->getFnAttribute(
"target-cpu") ==
684 Callee->getFnAttribute(
"target-cpu")) &&
685 (Caller->getFnAttribute(
"target-features") ==
686 Callee->getFnAttribute(
"target-features"));
706 unsigned AddrSpace)
const {
711 unsigned AddrSpace)
const {
721 unsigned ChainSizeInBytes,
727 unsigned ChainSizeInBytes,
754 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
755 const auto *VectorValue = cast<Constant>(Val);
759 auto *VT = cast<FixedVectorType>(Val->
getType());
765 unsigned MaxRequiredSize =
766 VT->getElementType()->getPrimitiveSizeInBits().getFixedSize();
768 unsigned MinRequiredSize = 0;
769 for (
unsigned i = 0,
e = VT->getNumElements();
i <
e; ++
i) {
770 if (
auto *IntElement =
771 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(
i))) {
772 bool signedElement = IntElement->getValue().isNegative();
774 unsigned ElementMinRequiredSize =
775 IntElement->getValue().getMinSignedBits() - 1;
777 isSigned |= signedElement;
779 MinRequiredSize =
std::max(MinRequiredSize, ElementMinRequiredSize);
782 return MaxRequiredSize;
785 return MinRequiredSize;
788 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
789 isSigned = CI->getValue().isNegative();
790 return CI->getValue().getMinSignedBits() - 1;
793 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
795 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
798 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
800 return Cast->getSrcTy()->getScalarSizeInBits();
808 return Ptr && isa<SCEVAddRecExpr>(Ptr);
812 const SCEV *Ptr)
const {
813 if (!isStridedAccess(Ptr))
820 int64_t MergeDistance)
const {
821 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
834 template <
typename T>
843 using BaseT::getGEPCost;
848 assert(PointeeType && Ptr &&
"can't get GEPCost of nullptr");
852 "explicit pointee type doesn't match operand's pointee type");
854 bool HasBaseReg = (BaseGV ==
nullptr);
856 auto PtrSizeBits =
DL.getPointerTypeSizeInBits(Ptr->
getType());
857 APInt BaseOffset(PtrSizeBits, 0);
861 Type *TargetType =
nullptr;
869 TargetType = GTI.getIndexedType();
872 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
875 ConstIdx = dyn_cast<ConstantInt>(Splat);
876 if (
StructType *STy = GTI.getStructTypeOrNull()) {
878 assert(ConstIdx &&
"Unexpected GEP index");
880 BaseOffset +=
DL.getStructLayout(STy)->getElementOffset(
Field);
884 if (isa<ScalableVectorType>(TargetType))
886 int64_t ElementSize =
887 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
901 if (
static_cast<T *
>(
this)->isLegalAddressingMode(
911 auto *TargetTTI =
static_cast<T *
>(
this);
914 auto *CB = dyn_cast<CallBase>(U);
915 if (CB && !isa<IntrinsicInst>(U)) {
916 if (
const Function *
F = CB->getCalledFunction()) {
917 if (!TargetTTI->isLoweredToCall(
F))
930 auto *
I = dyn_cast<Instruction>(U);
935 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
936 auto *Intrinsic = cast<IntrinsicInst>(U);
938 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
940 case Instruction::Br:
942 case Instruction::PHI:
943 case Instruction::Switch:
944 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
945 case Instruction::ExtractValue:
946 case Instruction::Freeze:
948 case Instruction::Alloca:
949 if (cast<AllocaInst>(U)->isStaticAlloca())
952 case Instruction::GetElementPtr: {
954 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
955 GEP->getPointerOperand(),
959 case Instruction::FAdd:
960 case Instruction::Sub:
961 case Instruction::FSub:
962 case Instruction::Mul:
963 case Instruction::FMul:
964 case Instruction::UDiv:
965 case Instruction::SDiv:
966 case Instruction::FDiv:
967 case Instruction::URem:
968 case Instruction::SRem:
969 case Instruction::FRem:
970 case Instruction::Shl:
971 case Instruction::LShr:
972 case Instruction::AShr:
973 case Instruction::And:
974 case Instruction::Or:
975 case Instruction::Xor:
976 case Instruction::FNeg: {
984 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind,
988 case Instruction::IntToPtr:
989 case Instruction::PtrToInt:
990 case Instruction::SIToFP:
991 case Instruction::UIToFP:
992 case Instruction::FPToUI:
993 case Instruction::FPToSI:
994 case Instruction::Trunc:
995 case Instruction::FPTrunc:
996 case Instruction::BitCast:
997 case Instruction::FPExt:
998 case Instruction::SExt:
999 case Instruction::ZExt:
1000 case Instruction::AddrSpaceCast:
1001 return TargetTTI->getCastInstrCost(
1004 auto *
SI = cast<StoreInst>(U);
1006 return TargetTTI->getMemoryOpCost(Opcode, ValTy,
SI->getAlign(),
1007 SI->getPointerAddressSpace(),
1011 auto *LI = cast<LoadInst>(U);
1012 return TargetTTI->getMemoryOpCost(Opcode, U->
getType(), LI->getAlign(),
1017 const Value *Op0, *Op1;
1030 return TargetTTI->getArithmeticInstrCost(
1035 return TargetTTI->getCmpSelInstrCost(Opcode, U->
getType(), CondTy,
1039 case Instruction::ICmp:
1040 case Instruction::FCmp: {
1043 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->
getType(),
1044 I ? cast<CmpInst>(
I)->getPredicate()
1048 case Instruction::InsertElement: {
1049 auto *
IE = dyn_cast<InsertElementInst>(U);
1052 auto *CI = dyn_cast<ConstantInt>(
IE->getOperand(2));
1053 unsigned Idx = CI ? CI->getZExtValue() : -1;
1054 return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx);
1056 case Instruction::ShuffleVector: {
1057 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1060 auto *VecTy = cast<VectorType>(U->
getType());
1065 if (Shuffle->isExtractSubvectorMask(SubIndex))
1067 Shuffle->getShuffleMask(), SubIndex,
1069 else if (Shuffle->changesLength())
1071 else if (Shuffle->isIdentity())
1073 else if (Shuffle->isReverse())
1075 Shuffle->getShuffleMask(), 0,
nullptr);
1076 else if (Shuffle->isSelect())
1078 Shuffle->getShuffleMask(), 0,
nullptr);
1079 else if (Shuffle->isTranspose())
1081 Shuffle->getShuffleMask(), 0,
nullptr);
1082 else if (Shuffle->isZeroEltSplat())
1084 Shuffle->getShuffleMask(), 0,
nullptr);
1085 else if (Shuffle->isSingleSource())
1087 Shuffle->getShuffleMask(), 0,
nullptr);
1090 Shuffle->getShuffleMask(), 0,
nullptr);
1092 case Instruction::ExtractElement: {
1094 auto *EEI = dyn_cast<ExtractElementInst>(U);
1098 auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1));
1100 Idx = CI->getZExtValue();
1109 return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType,
1112 return TargetTTI->getMinMaxReductionCost(
1116 return TargetTTI->getMinMaxReductionCost(
1135 if (isa<LoadInst>(
I))
1138 Type *DstTy =
I->getType();
1142 if (
auto *CI = dyn_cast<CallInst>(
I)) {
1143 const Function *
F = CI->getCalledFunction();
1144 if (!
F ||
static_cast<T *
>(
this)->isLoweredToCall(
F))
1148 if (
StructType *StructTy = dyn_cast<StructType>(DstTy))
1149 DstTy = StructTy->getElementType(0);
1153 if (
VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
1154 DstTy = VectorTy->getElementType();
RTTIExtends< ObjectLinkingLayer, ObjectLayer > BaseT
A parsed version of the target data layout string in and methods for querying it.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isPointerTy() const
True if this is an instance of PointerType.
ScalarTy getFixedSize() const
Represents a single loop in the control flow graph.
instcombine should handle this C2 when C1
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int64_t getSExtValue() const
Get sign extended value.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
The main scalar evolution driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getBitWidth() const
Return the number of bits in the APInt.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
gep_type_iterator gep_type_begin(const User *GEP)
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This is the shared class of boolean and integer constants.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static ElementCount get(ScalarTy MinVal, bool Scalable)
bool match(Val *V, const Pattern &P)
(vector float) vec_cmpeq(*A, *B) C
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis containing CSE Info
This struct is a compact representation of a valid (non-zero power of two) alignment.
static TypeSize getFixed(ScalarTy MinVal)
Analysis providing profile information.
mir Rename Register Operands
Base class of all SIMD vector types.
This class represents an analyzed expression in the program.
An instruction for storing to memory.
This is an important base class in LLVM.
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
This class represents a constant integer value.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
print Print MemDeps of function
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent struct types.
StringRef - Represent a constant reference to a string, i.e.
A cache of @llvm.assume calls within a function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
An instruction for reading from memory.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
amdgpu Simplify well known AMD library false FunctionCallee Callee
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
This node represents a polynomial recurrence on the trip count of the specified loop.
constexpr unsigned BitWidth
Provides information about what library functions are available for the current target.
iterator_range< value_op_iterator > operand_values()
The core instruction combiner logic.
const APInt & getAPInt() const
A wrapper class for inspecting calls to intrinsic functions.
Attributes of a target dependent hardware loop.
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
unsigned getNumOperands() const
Intrinsic::ID getID() const
Type * getPointerElementType() const
Align max(MaybeAlign Lhs, Align Rhs)
Information about a load/store intrinsic defined by the target.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * getOperand(unsigned i) const
Conditional or Unconditional Branch instruction.
LLVM Value Representation.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.