14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
55 if (!isa<Constant>(Operand))
68 return SI.getNumCases();
114 std::pair<const Value *, unsigned>
116 return std::make_pair(
nullptr, -1);
125 assert(
F &&
"A concrete function must be provided to this routine.");
132 if (
F->isIntrinsic())
135 if (
F->hasLocalLinkage() || !
F->hasName())
141 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
151 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
152 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
183 std::optional<Value *>
186 bool &KnownBitsComputed)
const {
194 SimplifyAndSetOp)
const {
210 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
214 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
280 Align Alignment)
const {
304 int64_t BaseOffset,
bool HasBaseReg,
306 unsigned AddrSpace)
const {
320 bool useAA()
const {
return false; }
335 const APInt &DemandedElts,
336 bool Insert,
bool Extract,
361 bool IsZeroCmp)
const {
375 unsigned *
Fast)
const {
425 return "Generic::Unknown Register Class";
427 return "Generic::ScalarRC";
429 return "Generic::VectorRC";
451 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
455 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
456 AllowPromotionWithoutCommonHeader =
false;
461 std::optional<unsigned>
472 std::optional<unsigned>
486 unsigned NumStridedMemAccesses,
487 unsigned NumPrefetches,
bool HasCall)
const {
506 case Instruction::FDiv:
507 case Instruction::FRem:
508 case Instruction::SDiv:
509 case Instruction::SRem:
510 case Instruction::UDiv:
511 case Instruction::URem:
538 case Instruction::IntToPtr: {
539 unsigned SrcSize = Src->getScalarSizeInBits();
545 case Instruction::PtrToInt: {
546 unsigned DstSize = Dst->getScalarSizeInBits();
552 case Instruction::BitCast:
553 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
557 case Instruction::Trunc: {
571 unsigned Index)
const {
600 unsigned Index)
const {
605 const APInt &DemandedDstElts,
632 const Value *
Ptr,
bool VariableMask,
642 bool UseMaskForCond,
bool UseMaskForGaps)
const {
648 switch (ICA.
getID()) {
651 case Intrinsic::annotation:
652 case Intrinsic::assume:
653 case Intrinsic::sideeffect:
654 case Intrinsic::pseudoprobe:
655 case Intrinsic::arithmetic_fence:
656 case Intrinsic::dbg_declare:
657 case Intrinsic::dbg_value:
658 case Intrinsic::dbg_label:
659 case Intrinsic::invariant_start:
660 case Intrinsic::invariant_end:
661 case Intrinsic::launder_invariant_group:
662 case Intrinsic::strip_invariant_group:
663 case Intrinsic::is_constant:
664 case Intrinsic::lifetime_start:
665 case Intrinsic::lifetime_end:
666 case Intrinsic::experimental_noalias_scope_decl:
667 case Intrinsic::objectsize:
668 case Intrinsic::ptr_annotation:
669 case Intrinsic::var_annotation:
670 case Intrinsic::experimental_gc_result:
671 case Intrinsic::experimental_gc_relocate:
672 case Intrinsic::coro_alloc:
673 case Intrinsic::coro_begin:
674 case Intrinsic::coro_free:
675 case Intrinsic::coro_end:
676 case Intrinsic::coro_frame:
677 case Intrinsic::coro_size:
678 case Intrinsic::coro_align:
679 case Intrinsic::coro_suspend:
680 case Intrinsic::coro_subfn_addr:
681 case Intrinsic::threadlocal_address:
698 const SCEV *)
const {
703 std::optional<FastMathFlags> FMF,
715 std::optional<FastMathFlags> FMF,
744 Type *ExpectedType)
const {
750 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
751 unsigned SrcAlign,
unsigned DestAlign,
752 std::optional<uint32_t> AtomicElementSize)
const {
759 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
760 unsigned SrcAlign,
unsigned DestAlign,
761 std::optional<uint32_t> AtomicCpySize)
const {
762 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
764 for (
unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
770 return (Caller->getFnAttribute(
"target-cpu") ==
771 Callee->getFnAttribute(
"target-cpu")) &&
772 (Caller->getFnAttribute(
"target-features") ==
773 Callee->getFnAttribute(
"target-features"));
778 return (Caller->getFnAttribute(
"target-cpu") ==
779 Callee->getFnAttribute(
"target-cpu")) &&
780 (Caller->getFnAttribute(
"target-features") ==
781 Callee->getFnAttribute(
"target-features"));
801 unsigned AddrSpace)
const {
806 unsigned AddrSpace)
const {
818 unsigned ChainSizeInBytes,
824 unsigned ChainSizeInBytes,
854 Align Alignment)
const {
869 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
870 const auto *VectorValue = cast<Constant>(Val);
874 auto *VT = cast<FixedVectorType>(Val->
getType());
880 unsigned MaxRequiredSize =
881 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
883 unsigned MinRequiredSize = 0;
884 for (
unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
885 if (
auto *IntElement =
886 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
887 bool signedElement = IntElement->getValue().isNegative();
889 unsigned ElementMinRequiredSize =
890 IntElement->getValue().getMinSignedBits() - 1;
894 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
897 return MaxRequiredSize;
900 return MinRequiredSize;
903 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
904 isSigned = CI->getValue().isNegative();
905 return CI->getValue().getMinSignedBits() - 1;
908 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
910 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
913 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
915 return Cast->getSrcTy()->getScalarSizeInBits();
923 return Ptr && isa<SCEVAddRecExpr>(
Ptr);
935 int64_t MergeDistance)
const {
963 assert(PointeeType &&
Ptr &&
"can't get GEPCost of nullptr");
964 assert(cast<PointerType>(
Ptr->getType()->getScalarType())
965 ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
966 "explicit pointee type doesn't match operand's pointee type");
967 auto *BaseGV = dyn_cast<GlobalValue>(
Ptr->stripPointerCasts());
968 bool HasBaseReg = (BaseGV ==
nullptr);
971 APInt BaseOffset(PtrSizeBits, 0);
975 Type *TargetType =
nullptr;
983 TargetType = GTI.getIndexedType();
986 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
989 ConstIdx = dyn_cast<ConstantInt>(Splat);
990 if (
StructType *STy = GTI.getStructTypeOrNull()) {
992 assert(ConstIdx &&
"Unexpected GEP index");
998 if (isa<ScalableVectorType>(TargetType))
1000 int64_t ElementSize =
1010 Scale = ElementSize;
1018 Ptr->getType()->getPointerAddressSpace()))
1028 auto *TargetTTI =
static_cast<T *
>(
this);
1031 auto *CB = dyn_cast<CallBase>(U);
1032 if (CB && !isa<IntrinsicInst>(U)) {
1033 if (
const Function *
F = CB->getCalledFunction()) {
1034 if (!TargetTTI->isLoweredToCall(
F))
1045 auto *
I = dyn_cast<Instruction>(U);
1049 case Instruction::Call: {
1050 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
1051 auto *Intrinsic = cast<IntrinsicInst>(U);
1053 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1055 case Instruction::Br:
1056 case Instruction::Ret:
1057 case Instruction::PHI:
1058 case Instruction::Switch:
1059 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1060 case Instruction::ExtractValue:
1061 case Instruction::Freeze:
1063 case Instruction::Alloca:
1064 if (cast<AllocaInst>(U)->isStaticAlloca())
1067 case Instruction::GetElementPtr: {
1068 const auto *
GEP = cast<GEPOperator>(U);
1069 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1070 GEP->getPointerOperand(),
1073 case Instruction::Add:
1074 case Instruction::FAdd:
1075 case Instruction::Sub:
1076 case Instruction::FSub:
1077 case Instruction::Mul:
1078 case Instruction::FMul:
1079 case Instruction::UDiv:
1080 case Instruction::SDiv:
1081 case Instruction::FDiv:
1082 case Instruction::URem:
1083 case Instruction::SRem:
1084 case Instruction::FRem:
1085 case Instruction::Shl:
1086 case Instruction::LShr:
1087 case Instruction::AShr:
1088 case Instruction::And:
1089 case Instruction::Or:
1090 case Instruction::Xor:
1091 case Instruction::FNeg: {
1094 if (Opcode != Instruction::FNeg)
1097 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind, Op1Info,
1100 case Instruction::IntToPtr:
1101 case Instruction::PtrToInt:
1102 case Instruction::SIToFP:
1103 case Instruction::UIToFP:
1104 case Instruction::FPToUI:
1105 case Instruction::FPToSI:
1106 case Instruction::Trunc:
1107 case Instruction::FPTrunc:
1108 case Instruction::BitCast:
1109 case Instruction::FPExt:
1110 case Instruction::SExt:
1111 case Instruction::ZExt:
1112 case Instruction::AddrSpaceCast: {
1114 return TargetTTI->getCastInstrCost(
1117 case Instruction::Store: {
1118 auto *
SI = cast<StoreInst>(U);
1121 return TargetTTI->getMemoryOpCost(Opcode, ValTy,
SI->getAlign(),
1125 case Instruction::Load: {
1129 auto *LI = cast<LoadInst>(U);
1140 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1141 LoadType = TI->getDestTy();
1143 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1145 {TTI::OK_AnyValue, TTI::OP_None},
I);
1147 case Instruction::Select: {
1148 const Value *Op0, *Op1;
1159 return TargetTTI->getArithmeticInstrCost(
1164 return TargetTTI->getCmpSelInstrCost(Opcode, U->
getType(), CondTy,
1168 case Instruction::ICmp:
1169 case Instruction::FCmp: {
1172 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->
getType(),
1173 I ? cast<CmpInst>(
I)->getPredicate()
1177 case Instruction::InsertElement: {
1178 auto *IE = dyn_cast<InsertElementInst>(U);
1182 if (
auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
1183 if (CI->getValue().getActiveBits() <= 32)
1184 Idx = CI->getZExtValue();
1185 return TargetTTI->getVectorInstrCost(*IE, Ty,
CostKind,
Idx);
1187 case Instruction::ShuffleVector: {
1188 auto *
Shuffle = dyn_cast<ShuffleVectorInst>(U);
1192 auto *VecTy = cast<VectorType>(U->
getType());
1194 int NumSubElts, SubIndex;
1196 if (
Shuffle->changesLength()) {
1198 if (
Shuffle->increasesLength() &&
Shuffle->isIdentityWithPadding())
1201 if (
Shuffle->isExtractSubvectorMask(SubIndex))
1206 if (
Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1207 return TargetTTI->getShuffleCost(
1213 int ReplicationFactor, VF;
1214 if (
Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1215 APInt DemandedDstElts =
1219 DemandedDstElts.
setBit(
I.index());
1221 return TargetTTI->getReplicationShuffleCost(
1222 VecSrcTy->getElementType(), ReplicationFactor, VF,
1247 if (
Shuffle->isZeroEltSplat())
1252 if (
Shuffle->isSingleSource())
1257 if (
Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1258 return TargetTTI->getShuffleCost(
1263 if (
Shuffle->isSplice(SubIndex))
1272 case Instruction::ExtractElement: {
1273 auto *EEI = dyn_cast<ExtractElementInst>(U);
1277 if (
auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
1278 if (CI->getValue().getActiveBits() <= 32)
1279 Idx = CI->getZExtValue();
1281 return TargetTTI->getVectorInstrCost(*EEI, DstTy,
CostKind,
Idx);
1291 auto *TargetTTI =
static_cast<T *
>(
this);
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isSigned(unsigned int Opcode)
mir Rename Register Operands
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Class for arbitrary precision integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getNullValue(unsigned numBits)
NOTE: This is soft-deprecated. Please use getZero() instead.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
The core instruction combiner logic.
Drive the analysis of interleaved memory accesses in the loop.
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
uint64_t getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
iterator_range< value_op_iterator > operand_values()
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
This is an optimization pass for GlobalISel generic memory operations.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr int UndefMaskElem
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
constexpr unsigned BitWidth
gep_type_iterator gep_type_begin(const User *GEP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.