14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
56 if (!isa<Constant>(Operand))
69 return SI.getNumCases();
134 std::pair<const Value *, unsigned>
136 return std::make_pair(
nullptr, -1);
145 assert(
F &&
"A concrete function must be provided to this routine.");
152 if (
F->isIntrinsic())
155 if (
F->hasLocalLinkage() || !
F->hasName())
162 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
174 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
175 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
201 std::optional<Value *>
204 bool &KnownBitsComputed)
const {
212 SimplifyAndSetOp)
const {
230 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
232 int64_t ScalableOffset = 0)
const {
235 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
305 Align Alignment)
const {
343 unsigned AddrSpace)
const {
346 Scale, AddrSpace,
nullptr,
358 bool useAA()
const {
return false; }
373 const APInt &DemandedElts,
374 bool Insert,
bool Extract,
395 bool IsZeroCmp)
const {
405 return isa<SelectInst>(
I) &&
418 unsigned *
Fast)
const {
474 return "Generic::Unknown Register Class";
476 return "Generic::ScalarRC";
478 return "Generic::VectorRC";
501 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
505 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
506 AllowPromotionWithoutCommonHeader =
false;
511 std::optional<unsigned>
522 std::optional<unsigned>
538 unsigned NumStridedMemAccesses,
539 unsigned NumPrefetches,
bool HasCall)
const {
555 auto IsWidenableCondition = [](
const Value *V) {
556 if (
auto *
II = dyn_cast<IntrinsicInst>(V))
557 if (
II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
566 case Instruction::FDiv:
567 case Instruction::FRem:
568 case Instruction::SDiv:
569 case Instruction::SRem:
570 case Instruction::UDiv:
571 case Instruction::URem:
574 case Instruction::And:
575 case Instruction::Or:
576 if (
any_of(Args, IsWidenableCondition))
612 case Instruction::IntToPtr: {
613 unsigned SrcSize = Src->getScalarSizeInBits();
619 case Instruction::PtrToInt: {
620 unsigned DstSize = Dst->getScalarSizeInBits();
626 case Instruction::BitCast:
627 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
631 case Instruction::Trunc: {
645 unsigned Index)
const {
674 unsigned Index)
const {
679 const APInt &DemandedDstElts,
706 const Value *
Ptr,
bool VariableMask,
714 const Value *
Ptr,
bool VariableMask,
724 bool UseMaskForCond,
bool UseMaskForGaps)
const {
730 switch (ICA.
getID()) {
733 case Intrinsic::experimental_vector_histogram_add:
736 case Intrinsic::allow_runtime_check:
737 case Intrinsic::allow_ubsan_check:
738 case Intrinsic::annotation:
739 case Intrinsic::assume:
740 case Intrinsic::sideeffect:
741 case Intrinsic::pseudoprobe:
742 case Intrinsic::arithmetic_fence:
743 case Intrinsic::dbg_assign:
744 case Intrinsic::dbg_declare:
745 case Intrinsic::dbg_value:
746 case Intrinsic::dbg_label:
747 case Intrinsic::invariant_start:
748 case Intrinsic::invariant_end:
749 case Intrinsic::launder_invariant_group:
750 case Intrinsic::strip_invariant_group:
751 case Intrinsic::is_constant:
752 case Intrinsic::lifetime_start:
753 case Intrinsic::lifetime_end:
754 case Intrinsic::experimental_noalias_scope_decl:
755 case Intrinsic::objectsize:
756 case Intrinsic::ptr_annotation:
757 case Intrinsic::var_annotation:
758 case Intrinsic::experimental_gc_result:
759 case Intrinsic::experimental_gc_relocate:
760 case Intrinsic::coro_alloc:
761 case Intrinsic::coro_begin:
762 case Intrinsic::coro_free:
763 case Intrinsic::coro_end:
764 case Intrinsic::coro_frame:
765 case Intrinsic::coro_size:
766 case Intrinsic::coro_align:
767 case Intrinsic::coro_suspend:
768 case Intrinsic::coro_subfn_addr:
769 case Intrinsic::threadlocal_address:
770 case Intrinsic::experimental_widenable_condition:
771 case Intrinsic::ssa_copy:
788 const SCEV *)
const {
793 std::optional<FastMathFlags> FMF,
835 Type *ExpectedType)
const {
841 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
843 std::optional<uint32_t> AtomicElementSize)
const {
844 return AtomicElementSize ?
Type::getIntNTy(Context, *AtomicElementSize * 8)
850 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
852 std::optional<uint32_t> AtomicCpySize)
const {
853 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
855 for (
unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
861 return (Caller->getFnAttribute(
"target-cpu") ==
862 Callee->getFnAttribute(
"target-cpu")) &&
863 (Caller->getFnAttribute(
"target-features") ==
864 Callee->getFnAttribute(
"target-features"));
868 unsigned DefaultCallPenalty)
const {
869 return DefaultCallPenalty;
874 return (Caller->getFnAttribute(
"target-cpu") ==
875 Callee->getFnAttribute(
"target-cpu")) &&
876 (Caller->getFnAttribute(
"target-features") ==
877 Callee->getFnAttribute(
"target-features"));
897 unsigned AddrSpace)
const {
902 unsigned AddrSpace)
const {
914 unsigned ChainSizeInBytes,
920 unsigned ChainSizeInBytes,
957 Align Alignment)
const {
976 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
977 const auto *VectorValue = cast<Constant>(Val);
981 auto *VT = cast<FixedVectorType>(Val->
getType());
987 unsigned MaxRequiredSize =
988 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
990 unsigned MinRequiredSize = 0;
991 for (
unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
992 if (
auto *IntElement =
993 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
994 bool signedElement = IntElement->getValue().isNegative();
996 unsigned ElementMinRequiredSize =
997 IntElement->getValue().getSignificantBits() - 1;
1001 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1004 return MaxRequiredSize;
1007 return MinRequiredSize;
1010 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
1011 isSigned = CI->getValue().isNegative();
1012 return CI->getValue().getSignificantBits() - 1;
1015 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
1017 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1020 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1022 return Cast->getSrcTy()->getScalarSizeInBits();
1030 return Ptr && isa<SCEVAddRecExpr>(
Ptr);
1042 int64_t MergeDistance)
const {
1056template <
typename T>
1070 assert(PointeeType &&
Ptr &&
"can't get GEPCost of nullptr");
1071 auto *BaseGV = dyn_cast<GlobalValue>(
Ptr->stripPointerCasts());
1072 bool HasBaseReg = (BaseGV ==
nullptr);
1075 APInt BaseOffset(PtrSizeBits, 0);
1079 Type *TargetType =
nullptr;
1087 TargetType = GTI.getIndexedType();
1090 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
1093 ConstIdx = dyn_cast<ConstantInt>(
Splat);
1094 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1096 assert(ConstIdx &&
"Unexpected GEP index");
1104 int64_t ElementSize =
1105 GTI.getSequentialElementStride(
DL).getFixedValue();
1114 Scale = ElementSize;
1129 AccessType = TargetType;
1136 Ptr->getType()->getPointerAddressSpace()))
1161 for (
const Value *V : Ptrs) {
1162 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1165 if (
Info.isSameBase() && V !=
Base) {
1166 if (
GEP->hasAllConstantIndices())
1175 GEP->getPointerOperand(),
1187 auto *TargetTTI =
static_cast<T *
>(
this);
1190 auto *CB = dyn_cast<CallBase>(U);
1191 if (CB && !isa<IntrinsicInst>(U)) {
1192 if (
const Function *
F = CB->getCalledFunction()) {
1193 if (!TargetTTI->isLoweredToCall(
F))
1202 Type *Ty = U->getType();
1204 auto *
I = dyn_cast<Instruction>(U);
1208 case Instruction::Call: {
1209 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
1210 auto *Intrinsic = cast<IntrinsicInst>(U);
1212 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1214 case Instruction::Br:
1215 case Instruction::Ret:
1216 case Instruction::PHI:
1217 case Instruction::Switch:
1218 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1219 case Instruction::ExtractValue:
1220 case Instruction::Freeze:
1222 case Instruction::Alloca:
1223 if (cast<AllocaInst>(U)->isStaticAlloca())
1226 case Instruction::GetElementPtr: {
1227 const auto *
GEP = cast<GEPOperator>(U);
1228 Type *AccessType =
nullptr;
1231 if (
GEP->hasOneUser() &&
I)
1232 AccessType =
I->user_back()->getAccessType();
1234 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1238 case Instruction::Add:
1239 case Instruction::FAdd:
1240 case Instruction::Sub:
1241 case Instruction::FSub:
1242 case Instruction::Mul:
1243 case Instruction::FMul:
1244 case Instruction::UDiv:
1245 case Instruction::SDiv:
1246 case Instruction::FDiv:
1247 case Instruction::URem:
1248 case Instruction::SRem:
1249 case Instruction::FRem:
1250 case Instruction::Shl:
1251 case Instruction::LShr:
1252 case Instruction::AShr:
1253 case Instruction::And:
1254 case Instruction::Or:
1255 case Instruction::Xor:
1256 case Instruction::FNeg: {
1259 if (Opcode != Instruction::FNeg)
1261 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind, Op1Info,
1264 case Instruction::IntToPtr:
1265 case Instruction::PtrToInt:
1266 case Instruction::SIToFP:
1267 case Instruction::UIToFP:
1268 case Instruction::FPToUI:
1269 case Instruction::FPToSI:
1270 case Instruction::Trunc:
1271 case Instruction::FPTrunc:
1272 case Instruction::BitCast:
1273 case Instruction::FPExt:
1274 case Instruction::SExt:
1275 case Instruction::ZExt:
1276 case Instruction::AddrSpaceCast: {
1278 return TargetTTI->getCastInstrCost(
1281 case Instruction::Store: {
1282 auto *SI = cast<StoreInst>(U);
1285 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1286 SI->getPointerAddressSpace(),
CostKind,
1289 case Instruction::Load: {
1293 auto *LI = cast<LoadInst>(U);
1294 Type *LoadType = U->getType();
1304 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1305 LoadType = TI->getDestTy();
1307 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1309 {TTI::OK_AnyValue, TTI::OP_None},
I);
1311 case Instruction::Select: {
1312 const Value *Op0, *Op1;
1323 return TargetTTI->getArithmeticInstrCost(
1328 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1332 case Instruction::ICmp:
1333 case Instruction::FCmp: {
1336 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1337 I ? cast<CmpInst>(
I)->getPredicate()
1341 case Instruction::InsertElement: {
1342 auto *IE = dyn_cast<InsertElementInst>(U);
1346 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[2]))
1347 if (CI->getValue().getActiveBits() <= 32)
1348 Idx = CI->getZExtValue();
1349 return TargetTTI->getVectorInstrCost(*IE, Ty,
CostKind,
Idx);
1351 case Instruction::ShuffleVector: {
1352 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1356 auto *VecTy = cast<VectorType>(U->getType());
1359 int NumSubElts, SubIndex;
1362 if (Shuffle->changesLength()) {
1364 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1367 if (Shuffle->isExtractSubvectorMask(SubIndex))
1372 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1373 return TargetTTI->getShuffleCost(
1378 int ReplicationFactor, VF;
1379 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1383 DemandedDstElts.
setBit(
I.index());
1385 return TargetTTI->getReplicationShuffleCost(
1386 VecSrcTy->getElementType(), ReplicationFactor, VF,
1390 bool IsUnary = isa<UndefValue>(
Operands[1]);
1391 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1397 if (Shuffle->increasesLength()) {
1398 for (
int &M : AdjustMask)
1399 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1401 return TargetTTI->getShuffleCost(
1415 std::iota(ExtractMask.
begin(), ExtractMask.
end(), 0);
1416 return ShuffleCost + TargetTTI->getShuffleCost(
1418 ExtractMask,
CostKind, 0, VecTy, {}, Shuffle);
1421 if (Shuffle->isIdentity())
1424 if (Shuffle->isReverse())
1428 if (Shuffle->isSelect())
1432 if (Shuffle->isTranspose())
1437 if (Shuffle->isZeroEltSplat())
1442 if (Shuffle->isSingleSource())
1447 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1448 return TargetTTI->getShuffleCost(
1453 if (Shuffle->isSplice(SubIndex))
1455 SubIndex,
nullptr,
Operands, Shuffle);
1460 case Instruction::ExtractElement: {
1461 auto *EEI = dyn_cast<ExtractElementInst>(U);
1465 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[1]))
1466 if (CI->getValue().getActiveBits() <= 32)
1467 Idx = CI->getZExtValue();
1469 return TargetTTI->getVectorInstrCost(*EEI, DstTy,
CostKind,
Idx);
1479 auto *TargetTTI =
static_cast<T *
>(
this);
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isSigned(unsigned int Opcode)
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.