Go to the documentation of this file.
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
60 class ScalarEvolution;
82 T *thisT() {
return static_cast<T *
>(
this); }
90 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
93 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
i);
110 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
i);
111 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
i);
121 "Can only extract subvectors from vectors");
123 assert((!isa<FixedVectorType>(VTy) ||
124 (
Index + NumSubElts) <=
125 (
int)cast<FixedVectorType>(VTy)->getNumElements()) &&
126 "SK_ExtractSubvector index out of range");
132 for (
int i = 0;
i != NumSubElts; ++
i) {
133 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
136 thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,
i);
146 "Can only insert subvectors into vectors");
148 assert((!isa<FixedVectorType>(VTy) ||
149 (
Index + NumSubElts) <=
150 (
int)cast<FixedVectorType>(VTy)->getNumElements()) &&
151 "SK_InsertSubvector index out of range");
157 for (
int i = 0;
i != NumSubElts; ++
i) {
159 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
i);
160 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
168 return static_cast<const T *
>(
this)->getST();
173 return static_cast<const T *
>(
this)->getTLI();
250 bool HasBaseReg, int64_t Scale,
285 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace) {
319 unsigned &JumpTableSize,
329 unsigned N =
SI.getNumCases();
340 APInt MaxCaseVal =
SI.case_begin()->getCaseValue()->getValue();
341 APInt MinCaseVal = MaxCaseVal;
342 for (
auto CI :
SI.cases()) {
343 const APInt &CaseVal = CI.getCaseValue()->getValue();
344 if (CaseVal.
sgt(MaxCaseVal))
345 MaxCaseVal = CaseVal;
346 if (CaseVal.
slt(MinCaseVal))
347 MinCaseVal = CaseVal;
353 for (
auto I :
SI.cases())
354 Dests.
insert(
I.getCaseSuccessor());
363 if (
N < 2 || N < TLI->getMinimumJumpTableEntries())
366 (MaxCaseVal - MinCaseVal)
370 JumpTableSize = Range;
386 if (!
TM.isPositionIndependent())
396 Triple TargetTriple =
TM.getTargetTriple();
463 else if (
ST->getSchedModel().LoopMicroOpBufferSize > 0)
464 MaxOps =
ST->getSchedModel().LoopMicroOpBufferSize;
471 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
531 bool &KnownBitsComputed) {
542 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
547 if (isa<LoadInst>(
I))
579 unsigned NumStridedMemAccesses,
580 unsigned NumPrefetches,
581 bool HasCall)
const {
583 NumPrefetches, HasCall);
609 bool Insert,
bool Extract) {
612 auto *Ty = cast<FixedVectorType>(InTy);
615 "Vector size mismatch");
619 for (
int i = 0,
e = Ty->getNumElements();
i <
e; ++
i) {
620 if (!DemandedElts[
i])
623 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,
i);
625 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
i);
634 auto *Ty = cast<FixedVectorType>(InTy);
637 return thisT()->getScalarizationOverhead(Ty, DemandedElts,
Insert, Extract);
645 assert(
Args.size() == Tys.
size() &&
"Expected matching Args and Tys");
649 for (
int I = 0,
E =
Args.size();
I !=
E;
I++) {
657 if (!isa<Constant>(A) && UniqueOperands.
insert(A).second) {
658 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
689 unsigned Opcode,
Type *Ty,
700 assert(ISD &&
"Invalid opcode");
706 Opd1PropInfo, Opd2PropInfo,
714 unsigned OpCost = (IsFloat ? 2 : 1);
719 return LT.first * OpCost;
725 return LT.first * 2 * OpCost;
731 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
732 unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
733 unsigned Cost = thisT()->getArithmeticInstrCost(
735 Opd1PropInfo, Opd2PropInfo,
Args, CxtI);
751 return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp));
757 return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp));
759 return getExtractSubvectorOverhead(Tp,
Index,
760 cast<FixedVectorType>(SubTp));
762 return getInsertSubvectorOverhead(Tp,
Index,
763 cast<FixedVectorType>(SubTp));
777 assert(ISD &&
"Invalid opcode");
781 TypeSize SrcSize = SrcLT.second.getSizeInBits();
782 TypeSize DstSize = DstLT.second.getSizeInBits();
789 case Instruction::Trunc:
794 case Instruction::BitCast:
797 if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
801 case Instruction::FPExt:
802 if (
I && getTLI()->isExtFree(
I))
805 case Instruction::ZExt:
806 if (TLI->
isZExtFree(SrcLT.second, DstLT.second))
809 case Instruction::SExt:
810 if (
I && getTLI()->isExtFree(
I))
820 if (DstLT.first == SrcLT.first &&
825 case Instruction::AddrSpaceCast:
832 auto *SrcVTy = dyn_cast<VectorType>(Src);
833 auto *DstVTy = dyn_cast<VectorType>(Dst);
836 if (SrcLT.first == DstLT.first &&
841 if (!SrcVTy && !DstVTy) {
852 if (DstVTy && SrcVTy) {
854 if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
857 if (Opcode == Instruction::ZExt)
861 if (Opcode == Instruction::SExt)
862 return SrcLT.first * 2;
868 return SrcLT.first * 1;
881 if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
882 DstVTy->getElementCount().isVector()) {
885 T *
TTI =
static_cast<T *
>(
this);
888 (!SplitSrc || !SplitDst) ?
TTI->getVectorSplitCost() : 0;
896 unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
909 if (Opcode == Instruction::BitCast) {
920 return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
937 assert(ISD &&
"Invalid opcode");
946 assert(CondTy &&
"CondTy must exist");
962 if (
auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
963 unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
967 Opcode, ValVTy->getScalarType(), CondTy, VecPred,
CostKind,
I);
979 std::pair<unsigned, MVT>
LT =
991 if (getTLI()->getValueType(
DL, Src,
true) ==
MVT::Other)
1005 LT.second.getSizeInBits())) {
1029 const Value *Ptr,
bool VariableMask,
1033 auto *VT = cast<FixedVectorType>(DataTy);
1040 VT->getNumElements() *
1042 Instruction::ExtractElement,
1044 VT->getNumElements()),
1052 int ConditionalCost = 0;
1061 VT->getNumElements() *
1063 Instruction::ExtractElement,
1065 VT->getNumElements()),
1071 return LoadCost + PackingCost + ConditionalCost;
1077 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) {
1078 auto *VT = cast<FixedVectorType>(VecTy);
1080 unsigned NumElts = VT->getNumElements();
1081 assert(Factor > 1 && NumElts % Factor == 0 &&
"Invalid interleave factor");
1083 unsigned NumSubElts = NumElts / Factor;
1088 if (UseMaskForCond || UseMaskForGaps)
1089 Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1092 Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment,
AddressSpace,
1098 unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1102 auto ceil = [](
unsigned A,
unsigned B) {
return (A +
B - 1) /
B; };
1121 unsigned NumLegalInsts =
ceil(VecTySize, VecTyLTSize);
1125 unsigned NumEltsPerLegalInst =
ceil(NumElts, NumLegalInsts);
1128 BitVector UsedInsts(NumLegalInsts,
false);
1129 for (
unsigned Index : Indices)
1130 for (
unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1131 UsedInsts.
set((
Index + Elt * Factor) / NumEltsPerLegalInst);
1135 Cost *= UsedInsts.
count() / NumLegalInsts;
1150 "Interleaved memory op has too many members");
1152 for (
unsigned Index : Indices) {
1153 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
1156 for (
unsigned i = 0;
i < NumSubElts;
i++)
1157 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT,
1161 unsigned InsSubCost = 0;
1162 for (
unsigned i = 0;
i < NumSubElts;
i++)
1164 thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT,
i);
1166 Cost += Indices.size() * InsSubCost;
1177 unsigned ExtSubCost = 0;
1178 for (
unsigned i = 0;
i < NumSubElts;
i++)
1180 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT,
i);
1181 Cost += ExtSubCost * Factor;
1183 for (
unsigned i = 0;
i < NumElts;
i++)
1184 Cost +=
static_cast<T *
>(
this)
1185 ->getVectorInstrCost(Instruction::InsertElement, VT,
i);
1188 if (!UseMaskForCond)
1205 for (
unsigned i = 0;
i < NumSubElts;
i++)
1207 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT,
i);
1209 for (
unsigned i = 0;
i < NumElts;
i++)
1211 thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT,
i);
1219 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1243 (RetTy->
isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1252 case Intrinsic::cttz:
1258 case Intrinsic::ctlz:
1265 return thisT()->getMemcpyCost(ICA.
getInst());
1267 case Intrinsic::masked_scatter: {
1269 bool VarMask = !isa<Constant>(
Mask);
1270 Align Alignment = cast<ConstantInt>(
Args[2])->getAlignValue();
1275 case Intrinsic::masked_gather: {
1277 bool VarMask = !isa<Constant>(
Mask);
1278 Align Alignment = cast<ConstantInt>(
Args[1])->getAlignValue();
1282 case Intrinsic::experimental_stepvector: {
1283 if (isa<ScalableVectorType>(RetTy))
1288 case Intrinsic::experimental_vector_extract: {
1291 if (isa<ScalableVectorType>(RetTy))
1293 unsigned Index = cast<ConstantInt>(
Args[1])->getZExtValue();
1296 Index, cast<VectorType>(RetTy));
1298 case Intrinsic::experimental_vector_insert: {
1303 unsigned Index = cast<ConstantInt>(
Args[2])->getZExtValue();
1304 return thisT()->getShuffleCost(
1308 case Intrinsic::experimental_vector_reverse: {
1311 0, cast<VectorType>(RetTy));
1313 case Intrinsic::vector_reduce_add:
1314 case Intrinsic::vector_reduce_mul:
1315 case Intrinsic::vector_reduce_and:
1316 case Intrinsic::vector_reduce_or:
1317 case Intrinsic::vector_reduce_xor:
1318 case Intrinsic::vector_reduce_smax:
1319 case Intrinsic::vector_reduce_smin:
1320 case Intrinsic::vector_reduce_fmax:
1321 case Intrinsic::vector_reduce_fmin:
1322 case Intrinsic::vector_reduce_umax:
1323 case Intrinsic::vector_reduce_umin: {
1327 case Intrinsic::vector_reduce_fadd:
1328 case Intrinsic::vector_reduce_fmul: {
1333 case Intrinsic::fshl:
1334 case Intrinsic::fshr: {
1335 if (isa<ScalableVectorType>(RetTy))
1351 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy,
CostKind);
1353 thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,
CostKind);
1354 Cost += thisT()->getArithmeticInstrCost(
1355 BinaryOperator::Shl, RetTy,
CostKind, OpKindX, OpKindZ, OpPropsX);
1356 Cost += thisT()->getArithmeticInstrCost(
1357 BinaryOperator::LShr, RetTy,
CostKind, OpKindY, OpKindZ, OpPropsY);
1361 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1363 OpPropsZ, OpPropsBW);
1368 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1383 ScalarizationCost = 0;
1385 ScalarizationCost +=
1387 ScalarizationCost +=
1393 return thisT()->getTypeBasedIntrinsicInstrCost(
Attrs,
CostKind);
1414 unsigned VecTyIndex = 0;
1415 if (IID == Intrinsic::vector_reduce_fadd ||
1416 IID == Intrinsic::vector_reduce_fmul)
1418 assert(Tys.size() > VecTyIndex &&
"Unexpected IntrinsicCostAttributes");
1419 VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
1428 if (isa<ScalableVectorType>(RetTy) ||
any_of(Tys, [](
const Type *Ty) {
1429 return isa<ScalableVectorType>(Ty);
1435 unsigned ScalarCalls = 1;
1436 Type *ScalarRetTy = RetTy;
1437 if (
auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1438 if (!SkipScalarizationCost)
1440 ScalarCalls =
std::max(ScalarCalls,
1441 cast<FixedVectorType>(RetVTy)->getNumElements());
1445 for (
unsigned i = 0, ie = Tys.size();
i != ie; ++
i) {
1447 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
1448 if (!SkipScalarizationCost)
1450 ScalarCalls =
std::max(ScalarCalls,
1451 cast<FixedVectorType>(VTy)->getNumElements());
1454 ScalarTys.push_back(Ty);
1456 if (ScalarCalls == 1)
1461 thisT()->getIntrinsicInstrCost(ScalarAttrs,
CostKind);
1463 return ScalarCalls * ScalarCost + ScalarizationCost;
1467 case Intrinsic::sqrt:
1470 case Intrinsic::sin:
1473 case Intrinsic::cos:
1476 case Intrinsic::exp:
1479 case Intrinsic::exp2:
1482 case Intrinsic::log:
1485 case Intrinsic::log10:
1491 case Intrinsic::fabs:
1494 case Intrinsic::canonicalize:
1509 case Intrinsic::copysign:
1521 case Intrinsic::nearbyint:
1524 case Intrinsic::rint:
1530 case Intrinsic::roundeven:
1533 case Intrinsic::pow:
1536 case Intrinsic::fma:
1539 case Intrinsic::fmuladd:
1542 case Intrinsic::experimental_constrained_fmuladd:
1546 case Intrinsic::lifetime_start:
1547 case Intrinsic::lifetime_end:
1548 case Intrinsic::sideeffect:
1549 case Intrinsic::pseudoprobe:
1551 case Intrinsic::masked_store: {
1553 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1557 case Intrinsic::masked_load: {
1559 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1563 case Intrinsic::vector_reduce_add:
1567 case Intrinsic::vector_reduce_mul:
1568 return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
1571 case Intrinsic::vector_reduce_and:
1572 return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
1575 case Intrinsic::vector_reduce_or:
1576 return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy,
1579 case Intrinsic::vector_reduce_xor:
1580 return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
1583 case Intrinsic::vector_reduce_fadd:
1585 return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
1588 case Intrinsic::vector_reduce_fmul:
1590 return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
1593 case Intrinsic::vector_reduce_smax:
1594 case Intrinsic::vector_reduce_smin:
1595 case Intrinsic::vector_reduce_fmax:
1596 case Intrinsic::vector_reduce_fmin:
1597 return thisT()->getMinMaxReductionCost(
1601 case Intrinsic::vector_reduce_umax:
1602 case Intrinsic::vector_reduce_umin:
1603 return thisT()->getMinMaxReductionCost(
1618 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1625 Cost += thisT()->getArithmeticInstrCost(
1629 case Intrinsic::sadd_sat:
1630 case Intrinsic::ssub_sat: {
1635 ? Intrinsic::sadd_with_overflow
1636 : Intrinsic::ssub_with_overflow;
1642 nullptr, ScalarizationCostPassed);
1645 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1647 Cost += 2 * thisT()->getCmpSelInstrCost(
1652 case Intrinsic::uadd_sat:
1653 case Intrinsic::usub_sat: {
1658 ? Intrinsic::uadd_with_overflow
1659 : Intrinsic::usub_with_overflow;
1663 nullptr, ScalarizationCostPassed);
1670 case Intrinsic::smul_fix:
1671 case Intrinsic::umul_fix: {
1676 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1680 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH,
CostKind);
1682 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
1683 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
1685 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
1688 Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy,
CostKind,
1691 Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy,
CostKind);
1694 case Intrinsic::sadd_with_overflow:
1695 case Intrinsic::ssub_with_overflow: {
1698 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1700 : BinaryOperator::Sub;
1711 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy,
CostKind);
1712 Cost += 3 * thisT()->getCmpSelInstrCost(
1713 Instruction::ICmp, SumTy, OverflowTy,
1715 Cost += 2 * thisT()->getCmpSelInstrCost(
1718 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
1722 case Intrinsic::uadd_with_overflow:
1723 case Intrinsic::usub_with_overflow: {
1726 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1728 : BinaryOperator::Sub;
1731 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy,
CostKind);
1733 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
1737 case Intrinsic::smul_with_overflow:
1738 case Intrinsic::umul_with_overflow: {
1745 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1749 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH,
CostKind);
1751 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
1752 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
1754 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy,
1758 if (IID == Intrinsic::smul_with_overflow)
1759 Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
1764 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy,
1768 case Intrinsic::ctpop:
1774 case Intrinsic::ctlz:
1777 case Intrinsic::cttz:
1780 case Intrinsic::bswap:
1783 case Intrinsic::bitreverse:
1793 for (
unsigned ISD : ISDs) {
1795 if (IID == Intrinsic::fabs &&
LT.second.isFloatingPoint() &&
1805 LegalCost.push_back(
LT.first * 2);
1807 LegalCost.push_back(
LT.first * 1);
1811 CustomCost.push_back(
LT.first * 2);
1815 auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1816 if (MinLegalCostI != LegalCost.end())
1817 return *MinLegalCostI;
1819 auto MinCustomCostI =
1820 std::min_element(CustomCost.begin(), CustomCost.end());
1821 if (MinCustomCostI != CustomCost.end())
1822 return *MinCustomCostI;
1826 if (IID == Intrinsic::fmuladd)
1827 return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
1829 thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
1831 if (IID == Intrinsic::experimental_constrained_fmuladd) {
1833 Intrinsic::experimental_constrained_fmul, RetTy, Tys);
1835 Intrinsic::experimental_constrained_fadd, RetTy, Tys);
1836 return thisT()->getIntrinsicInstrCost(FMulAttrs,
CostKind) +
1837 thisT()->getIntrinsicInstrCost(FAddAttrs,
CostKind);
1843 if (
auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1845 if (isa<ScalableVectorType>(RetTy) ||
any_of(Tys, [](
const Type *Ty) {
1846 return isa<ScalableVectorType>(Ty);
1850 unsigned ScalarizationCost = SkipScalarizationCost ?
1853 unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
1855 for (
unsigned i = 0, ie = Tys.size();
i != ie; ++
i) {
1859 ScalarTys.push_back(Ty);
1864 for (
unsigned i = 0, ie = Tys.size();
i != ie; ++
i) {
1865 if (
auto *VTy = dyn_cast<VectorType>(Tys[
i])) {
1868 ScalarCalls =
std::max(ScalarCalls,
1869 cast<FixedVectorType>(VTy)->getNumElements());
1872 return ScalarCalls * ScalarCost + ScalarizationCost;
1876 return SingleCallCost;
1944 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
1945 if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
1955 return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
1957 thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
1961 unsigned NumReduxLevels =
Log2_32(NumVecElts);
1962 unsigned ArithCost = 0;
1963 unsigned ShuffleCost = 0;
1964 std::pair<unsigned, MVT>
LT =
1965 thisT()->getTLI()->getTypeLegalizationCost(
DL, Ty);
1966 unsigned LongVectorCount = 0;
1968 LT.second.isVector() ?
LT.second.getVectorNumElements() : 1;
1969 while (NumVecElts > MVTLen) {
1973 ShuffleCost += (IsPairwise + 1) *
1976 ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy,
CostKind);
1981 NumReduxLevels -= LongVectorCount;
1991 unsigned NumShuffles = NumReduxLevels;
1992 if (IsPairwise && NumReduxLevels >= 1)
1993 NumShuffles += NumReduxLevels - 1;
1994 ShuffleCost += NumShuffles * thisT()->getShuffleCost(
1996 ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty);
1997 return ShuffleCost + ArithCost +
1998 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2004 bool IsPairwise,
bool IsUnsigned,
2008 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2009 unsigned NumReduxLevels =
Log2_32(NumVecElts);
2012 CmpOpcode = Instruction::FCmp;
2015 "expecting floating point or integer type for min/max reduction");
2016 CmpOpcode = Instruction::ICmp;
2019 unsigned ShuffleCost = 0;
2020 std::pair<unsigned, MVT>
LT =
2021 thisT()->getTLI()->getTypeLegalizationCost(
DL, Ty);
2022 unsigned LongVectorCount = 0;
2024 LT.second.isVector() ?
LT.second.getVectorNumElements() : 1;
2025 while (NumVecElts > MVTLen) {
2031 ShuffleCost += (IsPairwise + 1) *
2035 thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
2043 NumReduxLevels -= LongVectorCount;
2053 unsigned NumShuffles = NumReduxLevels;
2054 if (IsPairwise && NumReduxLevels >= 1)
2055 NumShuffles += NumReduxLevels - 1;
2056 ShuffleCost += NumShuffles * thisT()->getShuffleCost(
2060 (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
2066 return ShuffleCost + MinMaxCost +
2067 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2080 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2084 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
2088 return RedCost + MulCost + ExtCost;
2115 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
virtual bool enableWritePrefetching() const
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
static IntegerType * getInt1Ty(LLVMContext &C)
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
virtual Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
A parsed version of the target data layout string in and methods for querying it.
unsigned getInliningThresholdMultiplier()
bool isTruncateFree(Type *Ty1, Type *Ty2)
bool isAlwaysUniform(const Value *V)
Vector Rotate Left Mask Mask Insert
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ BR_JT
BR_JT - Jumptable branch.
bool isPointerTy() const
True if this is an instance of PointerType.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
bool isLegalICmpImmediate(int64_t imm)
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Represents a single loop in the control flow graph.
@ BSWAP
Byte Swap and Counting operators.
bool isTypeLegal(Type *Ty)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
instcombine should handle this C2 when C1
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getReturnType() const
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast) const
The main scalar evolution driver.
const IntrinsicInst * getInst() const
unsigned getFPOpCost(Type *Ty)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Triple - Helper class for working with autoconf configuration names.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getScalarizationCost() const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
virtual ~BasicTTIImplBase()=default
The instances of the Type class are immutable: once they are created, they are never changed.
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
virtual bool isProfitableToHoist(Instruction *I) const
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
InstructionCost getInstructionLatency(const Instruction *I)
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands)
Type * getElementType() const
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
Convenience struct for specifying and reasoning about fast-math flags.
virtual bool isCheapToSpeculateCtlz() const
Return true if it is cheap to speculate a call to intrinsic ctlz.
Class to represent fixed width SIMD vectors.
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwise, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
bool isNumRegsMajorCostOfLSR()
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
bool isSourceOfDivergence(const Value *V)
static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS)
bool isArch64Bit() const
Test whether the architecture is 64-bit.
bool skipScalarizationCost() const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
LLVM Basic Block Representation.
FastMathFlags getFlags() const
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
@ BRIND
BRIND - Indirect branch.
bool isScalar() const
Counting predicates.
unsigned getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
virtual unsigned getPrefetchDistance() const
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
unsigned getNumElements() const
const SmallVectorImpl< Type * > & getArgTypes() const
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
unsigned adjustInliningThreshold(const CallBase *CB)
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
bool useGPUDivergenceAnalysis()
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
virtual bool enableWritePrefetching() const
size_type count() const
count - Returns the number of bits which are set.
static uint64_t round(uint64_t Acc, uint64_t Input)
unsigned getFlatAddressSpace()
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
iterator_range< block_iterator > blocks() const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
bool isVectorTy() const
True if this is an instance of VectorType.
virtual Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
virtual bool isCheapToSpeculateCttz() const
Return true if it is cheap to speculate a call to intrinsic cttz.
unsigned getMaxInterleaveFactor(unsigned VF)
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency)
Compute a cost of the given call instruction.
unsigned getRegUsageForType(Type *Ty)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
int getInlinerVectorBonusPercent()
virtual unsigned getCacheLineSize() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
cl::opt< unsigned > PartialUnrollingThreshold
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
static TypeSize getFixed(ScalarTy MinVal)
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool shouldBuildLookupTables()
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Analysis providing profile information.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
bool isIntegerTy() const
True if this is an instance of IntegerType.
@ FADD
Simple binary floating point operators.
Base class of all SIMD vector types.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool slt(const APInt &RHS) const
Signed less than comparison.
This class represents an analyzed expression in the program.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
bool haveFastSqrt(Type *Ty)
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwise, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction operations.
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
This is an important class for using LLVM in a threaded context.
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Drive the analysis of memory accesses in the loop.
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Base class which can be used to help build a TTI implementation.
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
Primary interface to the complete machine description for the target machine.
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
Optional< unsigned > getMaxVScale() const
print Print MemDeps of function
bool isVoidTy() const
Return true if this is 'void'.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
BasicTTIImpl(const TargetMachine *TM, const Function &F)
bool isVector() const
One or more elements.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
Class for arbitrary precision integers.
virtual unsigned getMaxPrefetchIterationsAhead() const
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
bool shouldBuildRelLookupTables()
bool emitGetActiveLaneMask()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
A cache of @llvm.assume calls within a function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
const TargetMachine & getTargetMachine() const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Concrete BasicTTIImpl that can be used if no further customization is needed.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static const unsigned DefaultLoadLatency
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
constexpr unsigned BitWidth
bool isProfitableToHoist(Instruction *I)
bool isLegalAddImmediate(int64_t imm)
Provides information about what library functions are available for the current target.
bool isTargetIntrinsic() const
isTargetIntrinsic - Returns true if this function is an intrinsic and the intrinsic is specific to a ...
static double log2(double V)
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
The core instruction combiner logic.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
A wrapper class for inspecting calls to intrinsic functions.
Attributes of a target dependent hardware loop.
static InstructionCost getInvalid(CostType Val=0)
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Intrinsic::ID getID() const
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
size_t size() const
size - Get the array size.
Align max(MaybeAlign Lhs, Align Rhs)
unsigned getNumberOfParts(Type *Tp)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
unsigned getAssumedAddrSpace(const Value *V) const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool sgt(const APInt &RHS) const
Signed greater than comparison.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)
unsigned getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
virtual Optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned getVectorSplitCost()
unsigned getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isTypeBasedOnly() const
virtual Optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
bool hasBranchDivergence()
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
LLVM Value Representation.
bool isProfitableLSRChainElement(Instruction *I)
const SmallVectorImpl< const Value * > & getArgs() const
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...