Go to the documentation of this file.
18 #include "llvm/IR/IntrinsicsAArch64.h"
25 #define DEBUG_TYPE "aarch64tti"
35 TM.getSubtargetImpl(*Caller)->getFeatureBits();
37 TM.getSubtargetImpl(*Callee)->getFeatureBits();
41 return (CallerBits & CalleeBits) == CalleeBits;
73 ImmVal = Imm.
sext((BitSize + 63) & ~0x3fU);
78 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
81 Cost += getIntImmCost(Val);
99 unsigned ImmIdx = ~0U;
103 case Instruction::GetElementPtr:
112 case Instruction::Sub:
113 case Instruction::Mul:
114 case Instruction::UDiv:
115 case Instruction::SDiv:
116 case Instruction::URem:
117 case Instruction::SRem:
118 case Instruction::And:
119 case Instruction::Or:
120 case Instruction::Xor:
121 case Instruction::ICmp:
125 case Instruction::Shl:
126 case Instruction::LShr:
127 case Instruction::AShr:
131 case Instruction::Trunc:
132 case Instruction::ZExt:
133 case Instruction::SExt:
134 case Instruction::IntToPtr:
135 case Instruction::PtrToInt:
136 case Instruction::BitCast:
137 case Instruction::PHI:
146 int NumConstants = (BitSize + 63) / 64;
169 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
175 case Intrinsic::sadd_with_overflow:
176 case Intrinsic::uadd_with_overflow:
177 case Intrinsic::ssub_with_overflow:
178 case Intrinsic::usub_with_overflow:
179 case Intrinsic::smul_with_overflow:
180 case Intrinsic::umul_with_overflow:
182 int NumConstants = (BitSize + 63) / 64;
189 case Intrinsic::experimental_stackmap:
193 case Intrinsic::experimental_patchpoint_void:
194 case Intrinsic::experimental_patchpoint_i64:
198 case Intrinsic::experimental_gc_statepoint:
209 if (TyWidth == 32 || TyWidth == 64)
219 switch (ICA.
getID()) {
222 auto LT = TLI->getTypeLegalizationCost(
DL, RetTy);
233 auto LT = TLI->getTypeLegalizationCost(
DL, RetTy);
238 case Intrinsic::sadd_sat:
239 case Intrinsic::ssub_sat:
240 case Intrinsic::uadd_sat:
241 case Intrinsic::usub_sat: {
245 auto LT = TLI->getTypeLegalizationCost(
DL, RetTy);
249 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
251 return LT.first * Instrs;
258 auto LT = TLI->getTypeLegalizationCost(
DL, RetTy);
263 case Intrinsic::experimental_stepvector: {
265 auto LT = TLI->getTypeLegalizationCost(
DL, RetTy);
272 Cost += AddCost * (
LT.first - 1);
279 return BaseT::getIntrinsicInstrCost(ICA,
CostKind);
282 bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
287 auto toVectorTy = [&](
Type *ArgTy) {
289 cast<VectorType>(DstTy)->getElementCount());
306 case Instruction::Sub:
316 if (
Args.size() != 2 ||
317 (!isa<SExtInst>(
Args[1]) && !isa<ZExtInst>(
Args[1])) ||
318 !
Args[1]->hasOneUse())
320 auto *Extend = cast<CastInst>(
Args[1]);
324 auto DstTyL = TLI->getTypeLegalizationCost(
DL, DstTy);
325 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
331 auto *SrcTy = toVectorTy(Extend->getSrcTy());
332 auto SrcTyL = TLI->getTypeLegalizationCost(
DL, SrcTy);
333 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
334 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
338 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements();
339 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
343 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
351 int ISD = TLI->InstructionOpcodeToISD(Opcode);
352 assert(ISD &&
"Invalid opcode");
356 if (
I &&
I->hasOneUse()) {
357 auto *SingleUser = cast<Instruction>(*
I->user_begin());
359 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands)) {
362 if (
I == SingleUser->getOperand(1))
367 if (
auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
368 if (
I->getOpcode() ==
unsigned(Cast->getOpcode()) &&
369 cast<CastInst>(
I)->getSrcTy() == Cast->getSrcTy())
377 return Cost == 0 ? 0 : 1;
381 EVT SrcTy = TLI->getValueType(
DL, Src);
382 EVT DstTy = TLI->getValueType(
DL, Dst);
386 BaseT::getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I));
591 return AdjustCost(Entry->Cost);
594 BaseT::getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I));
603 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
611 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
616 getVectorInstrCost(Instruction::ExtractElement, VecTy,
Index);
619 auto VecLT = TLI->getTypeLegalizationCost(
DL, VecTy);
620 auto DstVT = TLI->getValueType(
DL, Dst);
621 auto SrcVT = TLI->getValueType(
DL, Src);
627 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
633 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
643 case Instruction::SExt:
648 case Instruction::ZExt:
649 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
662 return Opcode == Instruction::PHI ? 0 : 1;
674 std::pair<int, MVT>
LT = TLI->getTypeLegalizationCost(
DL, Val);
677 if (!
LT.second.isVector())
681 unsigned Width =
LT.second.getVectorNumElements();
690 return ST->getVectorInsertExtractBaseCost();
701 return BaseT::getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info,
702 Opd2Info, Opd1PropInfo,
703 Opd2PropInfo,
Args, CxtI);
706 std::pair<int, MVT>
LT = TLI->getTypeLegalizationCost(
DL, Ty);
715 if (isWideningInstruction(Ty, Opcode,
Args))
716 Cost +=
ST->getWideningBaseCost();
718 int ISD = TLI->InstructionOpcodeToISD(Opcode);
722 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info,
724 Opd1PropInfo, Opd2PropInfo);
736 Cost += getArithmeticInstrCost(Instruction::Sub, Ty,
CostKind,
744 Cost += getArithmeticInstrCost(Instruction::AShr, Ty,
CostKind,
753 auto VT = TLI->getValueType(
DL, Ty);
754 if (TLI->isOperationLegalOrCustom(
ISD::MULHU, VT)) {
758 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty,
CostKind,
766 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty,
CostKind,
770 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
774 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info,
776 Opd1PropInfo, Opd2PropInfo);
780 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty,
CostKind,
781 Opd1Info, Opd2Info, Opd1PropInfo,
783 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty,
CostKind,
784 Opd1Info, Opd2Info, Opd1PropInfo,
794 return (Cost + 1) *
LT.first;
811 return (Cost + 1) *
LT.first;
817 return (Cost + 2) *
LT.first;
819 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info,
821 Opd1PropInfo, Opd2PropInfo);
831 unsigned NumVectorInstToHideOverhead = 10;
832 int MaxMergeDistance = 64;
835 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
836 return NumVectorInstToHideOverhead;
850 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
853 int ISD = TLI->InstructionOpcodeToISD(Opcode);
856 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
858 const int AmortizationCost = 20;
866 VecPred = CurrentPred;
874 auto LT = TLI->getTypeLegalizationCost(
DL, ValTy);
880 VectorSelectTbl[] = {
889 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
890 EVT SelValTy = TLI->getValueType(
DL, ValTy);
900 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
906 if (
ST->requiresStrictAlign()) {
912 Options.
MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
922 unsigned Opcode,
Type *DataTy,
const Value *Ptr,
bool VariableMask,
925 if (!isa<ScalableVectorType>(DataTy))
926 return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
928 auto *VT = cast<VectorType>(DataTy);
929 auto LT = TLI->getTypeLegalizationCost(
DL, DataTy);
932 assert(MaxNumVScale &&
"Expected valid max vscale value");
935 getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0,
CostKind,
I);
936 unsigned MaxNumElementsPerGather =
938 return LT.first * MaxNumElementsPerGather * MemOpCost;
942 return isa<FixedVectorType>(Ty) && !
ST->useSVEForFixedLengthVectors();
956 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment,
AddressSpace,
959 auto LT = TLI->getTypeLegalizationCost(
DL, Ty);
962 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
968 const int AmortizationCost = 6;
970 return LT.first * 2 * AmortizationCost;
973 if (useNeonVector(Ty) &&
974 cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
975 unsigned ProfitableNumElements;
978 ProfitableNumElements = 4;
982 ProfitableNumElements = 8;
984 if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) {
985 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
986 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
988 return NumVectorizableInstsToAmortize * NumVecElts * 2;
998 bool UseMaskForCond,
bool UseMaskForGaps) {
999 assert(Factor >= 2 &&
"Invalid interleave factor");
1000 auto *VecVTy = cast<FixedVectorType>(VecTy);
1002 if (!UseMaskForCond && !UseMaskForGaps &&
1003 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1004 unsigned NumElts = VecVTy->getNumElements();
1011 if (NumElts % Factor == 0 &&
1012 TLI->isLegalInterleavedAccessType(SubVecTy,
DL))
1013 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL);
1016 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1018 UseMaskForCond, UseMaskForGaps);
1024 for (
auto *
I : Tys) {
1025 if (!
I->isVectorTy())
1027 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
1036 return ST->getMaxInterleaveFactor();
1046 enum { MaxStridedLoads = 7 };
1048 int StridedLoads = 0;
1051 for (
const auto BB : L->
blocks()) {
1052 for (
auto &
I : *
BB) {
1053 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
1062 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
1063 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
1072 if (StridedLoads > MaxStridedLoads / 2)
1073 return StridedLoads;
1076 return StridedLoads;
1079 int StridedLoads = countStridedLoads(L, SE);
1081 <<
" strided loads\n");
1094 BaseT::getUnrollingPreferences(L, SE, UP);
1112 BaseT::getPeelingPreferences(L, SE, PP);
1116 Type *ExpectedType) {
1120 case Intrinsic::aarch64_neon_st2:
1121 case Intrinsic::aarch64_neon_st3:
1122 case Intrinsic::aarch64_neon_st4: {
1128 if (
ST->getNumElements() != NumElts)
1130 for (
unsigned i = 0,
e = NumElts;
i !=
e; ++
i) {
1136 for (
unsigned i = 0,
e = NumElts;
i !=
e; ++
i) {
1138 Res =
Builder.CreateInsertValue(Res, L,
i);
1142 case Intrinsic::aarch64_neon_ld2:
1143 case Intrinsic::aarch64_neon_ld3:
1144 case Intrinsic::aarch64_neon_ld4:
1145 if (Inst->
getType() == ExpectedType)
1156 case Intrinsic::aarch64_neon_ld2:
1157 case Intrinsic::aarch64_neon_ld3:
1158 case Intrinsic::aarch64_neon_ld4:
1159 Info.ReadMem =
true;
1160 Info.WriteMem =
false;
1163 case Intrinsic::aarch64_neon_st2:
1164 case Intrinsic::aarch64_neon_st3:
1165 case Intrinsic::aarch64_neon_st4:
1166 Info.ReadMem =
false;
1167 Info.WriteMem =
true;
1175 case Intrinsic::aarch64_neon_ld2:
1176 case Intrinsic::aarch64_neon_st2:
1177 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
1179 case Intrinsic::aarch64_neon_ld3:
1180 case Intrinsic::aarch64_neon_st3:
1181 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
1183 case Intrinsic::aarch64_neon_ld4:
1184 case Intrinsic::aarch64_neon_st4:
1185 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
1197 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
1198 bool Considerable =
false;
1199 AllowPromotionWithoutCommonHeader =
false;
1200 if (!isa<SExtInst>(&
I))
1202 Type *ConsideredSExtType =
1204 if (
I.getType() != ConsideredSExtType)
1208 for (
const User *U :
I.users()) {
1210 Considerable =
true;
1214 if (GEPInst->getNumOperands() > 2) {
1215 AllowPromotionWithoutCommonHeader =
true;
1220 return Considerable;
1229 if (Ty->
isBFloatTy() || !isLegalElementTypeForSVE(Ty))
1252 bool IsPairwise,
bool IsUnsigned,
1254 if (!isa<ScalableVectorType>(Ty))
1255 return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
1257 assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
1258 "Both vector needs to be scalable");
1260 std::pair<int, MVT>
LT = TLI->getTypeLegalizationCost(
DL, Ty);
1264 unsigned CmpOpcode =
1267 getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
1271 LegalizationCost *=
LT.first - 1;
1274 return LegalizationCost + 2;
1278 unsigned Opcode,
VectorType *ValTy,
bool IsPairwise,
1280 assert(!IsPairwise &&
"Cannot be pair wise to continue");
1282 std::pair<int, MVT>
LT = TLI->getTypeLegalizationCost(
DL, ValTy);
1283 int LegalizationCost = 0;
1286 LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy,
CostKind);
1287 LegalizationCost *=
LT.first - 1;
1290 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1291 assert(ISD &&
"Invalid opcode");
1299 return LegalizationCost + 2;
1307 bool IsPairwiseForm,
1310 if (isa<ScalableVectorType>(ValTy))
1311 return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm,
1314 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
1317 std::pair<int, MVT>
LT = TLI->getTypeLegalizationCost(
DL, ValTy);
1318 MVT MTy =
LT.second;
1319 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1320 assert(ISD &&
"Invalid opcode");
1334 return LT.first * Entry->Cost;
1336 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
1409 std::pair<int, MVT>
LT = TLI->getTypeLegalizationCost(
DL, Tp);
1411 return LT.first * Entry->Cost;
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
@ Or
Bitwise or logical OR of integers.
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Represents a single loop in the control flow graph.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int64_t getSExtValue() const
Get sign extended value.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Type * getReturnType() const
The main scalar evolution driver.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Container class for subtarget features.
Type * getElementType() const
unsigned getNumArgOperands() const
Value * getPointerOperand()
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
@ SMin
Signed integer min implemented in terms of select(cmp()).
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
@ And
Bitwise or logical AND of integers.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
bool match(Val *V, const Pattern &P)
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
int getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Type * getRecurrenceType() const
Returns the type of the recurrence.
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
iterator_range< block_iterator > blocks() const
bool isVectorTy() const
True if this is an instance of VectorType.
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
RecurKind getRecurrenceKind() const
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Type Conversion Cost Table.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Analysis containing CSE Info
@ AND
Bitwise operators - logical and, logical or, logical xor.
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwise, bool IsUnsigned, TTI::TargetCostKind CostKind)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
@ UMin
Unisgned integer min implemented in terms of select(cmp()).
mir Rename Register Operands
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isIntegerTy() const
True if this is an instance of IntegerType.
@ FADD
Simple binary floating point operators.
Base class of all SIMD vector types.
This class represents an analyzed expression in the program.
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, bool IsPairwiseForm, TTI::TargetCostKind CostKind)
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==.
bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc, ElementCount VF) const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
initializer< Ty > init(const Ty &Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
unsigned getLoopDepth() const
Return the nesting level of this loop.
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
unsigned getMaxInterleaveFactor(unsigned VF)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Class for arbitrary precision integers.
bool isIntPredicate() const
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Class to represent struct types.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
@ FMax
FP max implemented in terms of select(cmp()).
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
An instruction for reading from memory.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp)
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
static IntegerType * getInt64Ty(LLVMContext &C)
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
@ ADD
Simple integer binary arithmetic operators.
APInt sext(unsigned width) const
Sign extend to a new width.
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
A wrapper class for inspecting calls to intrinsic functions.
static InstructionCost getInvalid(CostType Val=0)
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Intrinsic::ID getID() const
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Value * getArgOperand(unsigned i) const
Align max(MaybeAlign Lhs, Align Rhs)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Information about a load/store intrinsic defined by the target.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
@ FMin
FP min implemented in terms of select(cmp()).
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)
@ SIGN_EXTEND
Conversion operators.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ SMax
Signed integer max implemented in terms of select(cmp()).
LLVM Value Representation.
bool isFP128Ty() const
Return true if this is 'fp128'.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
bool useNeonVector(const Type *Ty) const
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
@ Xor
Bitwise or logical XOR of integers.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const