14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
55 if (!isa<Constant>(Operand))
68 return SI.getNumCases();
127 std::pair<const Value *, unsigned>
129 return std::make_pair(
nullptr, -1);
138 assert(
F &&
"A concrete function must be provided to this routine.");
145 if (
F->isIntrinsic())
148 if (
F->hasLocalLinkage() || !
F->hasName())
154 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
164 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
165 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
191 std::optional<Value *>
194 bool &KnownBitsComputed)
const {
202 SimplifyAndSetOp)
const {
218 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
222 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
288 Align Alignment)
const {
312 int64_t BaseOffset,
bool HasBaseReg,
314 unsigned AddrSpace)
const {
328 bool useAA()
const {
return false; }
343 const APInt &DemandedElts,
344 bool Insert,
bool Extract,
369 bool IsZeroCmp)
const {
383 unsigned *
Fast)
const {
433 return "Generic::Unknown Register Class";
435 return "Generic::ScalarRC";
437 return "Generic::VectorRC";
460 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
464 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
465 AllowPromotionWithoutCommonHeader =
false;
470 std::optional<unsigned>
481 std::optional<unsigned>
495 unsigned NumStridedMemAccesses,
496 unsigned NumPrefetches,
bool HasCall)
const {
512 auto IsWidenableCondition = [](
const Value *V) {
513 if (
auto *II = dyn_cast<IntrinsicInst>(V))
514 if (II->
getIntrinsicID() == Intrinsic::experimental_widenable_condition)
523 case Instruction::FDiv:
524 case Instruction::FRem:
525 case Instruction::SDiv:
526 case Instruction::SRem:
527 case Instruction::UDiv:
528 case Instruction::URem:
531 case Instruction::And:
532 case Instruction::Or:
533 if (
any_of(Args, IsWidenableCondition))
560 case Instruction::IntToPtr: {
561 unsigned SrcSize = Src->getScalarSizeInBits();
567 case Instruction::PtrToInt: {
568 unsigned DstSize = Dst->getScalarSizeInBits();
574 case Instruction::BitCast:
575 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
579 case Instruction::Trunc: {
593 unsigned Index)
const {
622 unsigned Index)
const {
627 const APInt &DemandedDstElts,
654 const Value *
Ptr,
bool VariableMask,
664 bool UseMaskForCond,
bool UseMaskForGaps)
const {
670 switch (ICA.
getID()) {
673 case Intrinsic::annotation:
674 case Intrinsic::assume:
675 case Intrinsic::sideeffect:
676 case Intrinsic::pseudoprobe:
677 case Intrinsic::arithmetic_fence:
678 case Intrinsic::dbg_assign:
679 case Intrinsic::dbg_declare:
680 case Intrinsic::dbg_value:
681 case Intrinsic::dbg_label:
682 case Intrinsic::invariant_start:
683 case Intrinsic::invariant_end:
684 case Intrinsic::launder_invariant_group:
685 case Intrinsic::strip_invariant_group:
686 case Intrinsic::is_constant:
687 case Intrinsic::lifetime_start:
688 case Intrinsic::lifetime_end:
689 case Intrinsic::experimental_noalias_scope_decl:
690 case Intrinsic::objectsize:
691 case Intrinsic::ptr_annotation:
692 case Intrinsic::var_annotation:
693 case Intrinsic::experimental_gc_result:
694 case Intrinsic::experimental_gc_relocate:
695 case Intrinsic::coro_alloc:
696 case Intrinsic::coro_begin:
697 case Intrinsic::coro_free:
698 case Intrinsic::coro_end:
699 case Intrinsic::coro_frame:
700 case Intrinsic::coro_size:
701 case Intrinsic::coro_align:
702 case Intrinsic::coro_suspend:
703 case Intrinsic::coro_subfn_addr:
704 case Intrinsic::threadlocal_address:
705 case Intrinsic::experimental_widenable_condition:
722 const SCEV *)
const {
727 std::optional<FastMathFlags> FMF,
769 Type *ExpectedType)
const {
775 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
776 unsigned SrcAlign,
unsigned DestAlign,
777 std::optional<uint32_t> AtomicElementSize)
const {
784 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
785 unsigned SrcAlign,
unsigned DestAlign,
786 std::optional<uint32_t> AtomicCpySize)
const {
787 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
789 for (
unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
795 return (Caller->getFnAttribute(
"target-cpu") ==
796 Callee->getFnAttribute(
"target-cpu")) &&
797 (Caller->getFnAttribute(
"target-features") ==
798 Callee->getFnAttribute(
"target-features"));
803 return (Caller->getFnAttribute(
"target-cpu") ==
804 Callee->getFnAttribute(
"target-cpu")) &&
805 (Caller->getFnAttribute(
"target-features") ==
806 Callee->getFnAttribute(
"target-features"));
826 unsigned AddrSpace)
const {
831 unsigned AddrSpace)
const {
843 unsigned ChainSizeInBytes,
849 unsigned ChainSizeInBytes,
879 Align Alignment)
const {
898 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
899 const auto *VectorValue = cast<Constant>(Val);
903 auto *VT = cast<FixedVectorType>(Val->
getType());
909 unsigned MaxRequiredSize =
910 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
912 unsigned MinRequiredSize = 0;
913 for (
unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
914 if (
auto *IntElement =
915 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
916 bool signedElement = IntElement->getValue().isNegative();
918 unsigned ElementMinRequiredSize =
919 IntElement->getValue().getSignificantBits() - 1;
923 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
926 return MaxRequiredSize;
929 return MinRequiredSize;
932 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
933 isSigned = CI->getValue().isNegative();
934 return CI->getValue().getSignificantBits() - 1;
937 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
939 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
942 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
944 return Cast->getSrcTy()->getScalarSizeInBits();
952 return Ptr && isa<SCEVAddRecExpr>(
Ptr);
964 int64_t MergeDistance)
const {
992 assert(PointeeType &&
Ptr &&
"can't get GEPCost of nullptr");
993 auto *BaseGV = dyn_cast<GlobalValue>(
Ptr->stripPointerCasts());
994 bool HasBaseReg = (BaseGV ==
nullptr);
997 APInt BaseOffset(PtrSizeBits, 0);
1001 Type *TargetType =
nullptr;
1009 TargetType = GTI.getIndexedType();
1012 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
1015 ConstIdx = dyn_cast<ConstantInt>(
Splat);
1016 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1018 assert(ConstIdx &&
"Unexpected GEP index");
1026 int64_t ElementSize =
1036 Scale = ElementSize;
1051 AccessType = TargetType;
1058 Ptr->getType()->getPointerAddressSpace()))
1083 for (
const Value *V : Ptrs) {
1084 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1087 if (
Info.isSameBase() && V !=
Base) {
1088 if (
GEP->hasAllConstantIndices())
1097 GEP->getPointerOperand(),
1109 auto *TargetTTI =
static_cast<T *
>(
this);
1112 auto *CB = dyn_cast<CallBase>(U);
1113 if (CB && !isa<IntrinsicInst>(U)) {
1114 if (
const Function *
F = CB->getCalledFunction()) {
1115 if (!TargetTTI->isLoweredToCall(
F))
1124 Type *Ty = U->getType();
1126 auto *
I = dyn_cast<Instruction>(U);
1130 case Instruction::Call: {
1131 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
1132 auto *Intrinsic = cast<IntrinsicInst>(U);
1134 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1136 case Instruction::Br:
1137 case Instruction::Ret:
1138 case Instruction::PHI:
1139 case Instruction::Switch:
1140 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1141 case Instruction::ExtractValue:
1142 case Instruction::Freeze:
1144 case Instruction::Alloca:
1145 if (cast<AllocaInst>(U)->isStaticAlloca())
1148 case Instruction::GetElementPtr: {
1149 const auto *
GEP = cast<GEPOperator>(U);
1150 Type *AccessType =
nullptr;
1153 if (
GEP->hasOneUser() &&
I)
1154 AccessType =
I->user_back()->getAccessType();
1156 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1160 case Instruction::Add:
1161 case Instruction::FAdd:
1162 case Instruction::Sub:
1163 case Instruction::FSub:
1164 case Instruction::Mul:
1165 case Instruction::FMul:
1166 case Instruction::UDiv:
1167 case Instruction::SDiv:
1168 case Instruction::FDiv:
1169 case Instruction::URem:
1170 case Instruction::SRem:
1171 case Instruction::FRem:
1172 case Instruction::Shl:
1173 case Instruction::LShr:
1174 case Instruction::AShr:
1175 case Instruction::And:
1176 case Instruction::Or:
1177 case Instruction::Xor:
1178 case Instruction::FNeg: {
1181 if (Opcode != Instruction::FNeg)
1183 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind, Op1Info,
1186 case Instruction::IntToPtr:
1187 case Instruction::PtrToInt:
1188 case Instruction::SIToFP:
1189 case Instruction::UIToFP:
1190 case Instruction::FPToUI:
1191 case Instruction::FPToSI:
1192 case Instruction::Trunc:
1193 case Instruction::FPTrunc:
1194 case Instruction::BitCast:
1195 case Instruction::FPExt:
1196 case Instruction::SExt:
1197 case Instruction::ZExt:
1198 case Instruction::AddrSpaceCast: {
1200 return TargetTTI->getCastInstrCost(
1203 case Instruction::Store: {
1204 auto *SI = cast<StoreInst>(U);
1207 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1208 SI->getPointerAddressSpace(),
CostKind,
1211 case Instruction::Load: {
1215 auto *LI = cast<LoadInst>(U);
1216 Type *LoadType = U->getType();
1226 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1227 LoadType = TI->getDestTy();
1229 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1231 {TTI::OK_AnyValue, TTI::OP_None},
I);
1233 case Instruction::Select: {
1234 const Value *Op0, *Op1;
1245 return TargetTTI->getArithmeticInstrCost(
1250 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1254 case Instruction::ICmp:
1255 case Instruction::FCmp: {
1258 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1259 I ? cast<CmpInst>(
I)->getPredicate()
1263 case Instruction::InsertElement: {
1264 auto *IE = dyn_cast<InsertElementInst>(U);
1268 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[2]))
1269 if (CI->getValue().getActiveBits() <= 32)
1270 Idx = CI->getZExtValue();
1271 return TargetTTI->getVectorInstrCost(*IE, Ty,
CostKind,
Idx);
1273 case Instruction::ShuffleVector: {
1274 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1278 auto *VecTy = cast<VectorType>(U->getType());
1280 int NumSubElts, SubIndex;
1282 if (Shuffle->changesLength()) {
1284 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1287 if (Shuffle->isExtractSubvectorMask(SubIndex))
1289 Shuffle->getShuffleMask(),
CostKind,
1292 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1293 return TargetTTI->getShuffleCost(
1299 int ReplicationFactor, VF;
1300 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1301 APInt DemandedDstElts =
1303 for (
auto I :
enumerate(Shuffle->getShuffleMask())) {
1305 DemandedDstElts.
setBit(
I.index());
1307 return TargetTTI->getReplicationShuffleCost(
1308 VecSrcTy->getElementType(), ReplicationFactor, VF,
1315 if (Shuffle->isIdentity())
1318 if (Shuffle->isReverse())
1320 Shuffle->getShuffleMask(),
CostKind, 0,
1323 if (Shuffle->isSelect())
1325 Shuffle->getShuffleMask(),
CostKind, 0,
1328 if (Shuffle->isTranspose())
1330 Shuffle->getShuffleMask(),
CostKind, 0,
1333 if (Shuffle->isZeroEltSplat())
1335 Shuffle->getShuffleMask(),
CostKind, 0,
1338 if (Shuffle->isSingleSource())
1340 Shuffle->getShuffleMask(),
CostKind, 0,
1343 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1344 return TargetTTI->getShuffleCost(
1349 if (Shuffle->isSplice(SubIndex))
1351 Shuffle->getShuffleMask(),
CostKind,
1355 Shuffle->getShuffleMask(),
CostKind, 0,
1358 case Instruction::ExtractElement: {
1359 auto *EEI = dyn_cast<ExtractElementInst>(U);
1363 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[1]))
1364 if (CI->getValue().getActiveBits() <= 32)
1365 Idx = CI->getZExtValue();
1367 return TargetTTI->getVectorInstrCost(*EEI, DstTy,
CostKind,
Idx);
1377 auto *TargetTTI =
static_cast<T *
>(
this);
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isSigned(unsigned int Opcode)
mir Rename Register Operands
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
The core instruction combiner logic.
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.