Go to the documentation of this file.
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
61 class ScalarEvolution;
83 T *thisT() {
return static_cast<T *
>(
this); }
91 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
94 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
i);
111 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
i);
112 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
i);
122 "Can only extract subvectors from vectors");
124 assert((!isa<FixedVectorType>(VTy) ||
125 (Index + NumSubElts) <=
126 (
int)cast<FixedVectorType>(VTy)->getNumElements()) &&
127 "SK_ExtractSubvector index out of range");
133 for (
int i = 0;
i != NumSubElts; ++
i) {
134 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
137 thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,
i);
147 "Can only insert subvectors into vectors");
149 assert((!isa<FixedVectorType>(VTy) ||
150 (Index + NumSubElts) <=
151 (
int)cast<FixedVectorType>(VTy)->getNumElements()) &&
152 "SK_InsertSubvector index out of range");
158 for (
int i = 0;
i != NumSubElts; ++
i) {
160 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
i);
161 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
169 return static_cast<const T *
>(
this)->getST();
174 return static_cast<const T *
>(
this)->getTLI();
196 bool IsGatherScatter,
198 auto *VT = cast<FixedVectorType>(DataTy);
208 VT->getNumElements()),
212 VT->getNumElements() *
229 VT->getNumElements() *
231 Instruction::ExtractElement,
233 VT->getNumElements()),
239 return LoadCost + PackingCost + ConditionalCost;
286 std::pair<const Value *, unsigned>
305 bool HasBaseReg, int64_t Scale,
316 Type *ScalarValTy)
const {
317 auto &&IsSupportedByTarget = [
this, ScalarMemTy, ScalarValTy](
unsigned VF) {
320 if (getTLI()->isOperationLegal(
ISD::STORE, VT) ||
330 while (VF > 2 && IsSupportedByTarget(VF))
360 int64_t BaseOffset,
bool HasBaseReg,
361 int64_t Scale,
unsigned AddrSpace) {
387 assert(Val >= 0 &&
"Negative cost!");
398 unsigned &JumpTableSize,
408 unsigned N =
SI.getNumCases();
419 APInt MaxCaseVal =
SI.case_begin()->getCaseValue()->getValue();
420 APInt MinCaseVal = MaxCaseVal;
421 for (
auto CI :
SI.cases()) {
422 const APInt &CaseVal = CI.getCaseValue()->getValue();
423 if (CaseVal.
sgt(MaxCaseVal))
424 MaxCaseVal = CaseVal;
425 if (CaseVal.
slt(MinCaseVal))
426 MinCaseVal = CaseVal;
432 for (
auto I :
SI.cases())
433 Dests.
insert(
I.getCaseSuccessor());
442 if (
N < 2 || N < TLI->getMinimumJumpTableEntries())
445 (MaxCaseVal - MinCaseVal)
449 JumpTableSize = Range;
465 if (!
TM.isPositionIndependent())
475 Triple TargetTriple =
TM.getTargetTriple();
543 else if (
ST->getSchedModel().LoopMicroOpBufferSize > 0)
544 MaxOps =
ST->getSchedModel().LoopMicroOpBufferSize;
551 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
561 <<
"advising against unrolling the loop because it "
620 bool &KnownBitsComputed) {
631 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
636 if (isa<LoadInst>(
I))
668 unsigned NumStridedMemAccesses,
669 unsigned NumPrefetches,
670 bool HasCall)
const {
672 NumPrefetches, HasCall);
699 const APInt &DemandedElts,
700 bool Insert,
bool Extract) {
703 auto *Ty = cast<FixedVectorType>(InTy);
706 "Vector size mismatch");
710 for (
int i = 0,
e = Ty->getNumElements();
i <
e; ++
i) {
711 if (!DemandedElts[
i])
714 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,
i);
716 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
i);
725 auto *Ty = cast<FixedVectorType>(InTy);
728 return thisT()->getScalarizationOverhead(Ty, DemandedElts,
Insert, Extract);
736 assert(
Args.size() == Tys.
size() &&
"Expected matching Args and Tys");
740 for (
int I = 0,
E =
Args.size();
I !=
E;
I++) {
748 if (!isa<Constant>(A) && UniqueOperands.
insert(A).second) {
749 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
788 assert(ISD &&
"Invalid opcode");
794 Opd1PropInfo, Opd2PropInfo,
807 return LT.first * OpCost;
813 return LT.first * 2 * OpCost;
825 unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;
827 DivOpc, Ty,
CostKind, Opd1Info, Opd2Info, Opd1PropInfo,
832 thisT()->getArithmeticInstrCost(Instruction::Sub, Ty,
CostKind);
833 return DivCost + MulCost + SubCost;
838 if (isa<ScalableVectorType>(Ty))
844 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
847 Opd1PropInfo, Opd2PropInfo,
Args, CxtI);
852 VTy->getNumElements() * Cost;
861 int Limit =
Mask.size() * 2;
899 if (
auto *FVT = dyn_cast<FixedVectorType>(Tp))
900 return getBroadcastShuffleOverhead(FVT);
908 if (
auto *FVT = dyn_cast<FixedVectorType>(Tp))
909 return getPermuteShuffleOverhead(FVT);
912 return getExtractSubvectorOverhead(Tp, Index,
913 cast<FixedVectorType>(SubTp));
915 return getInsertSubvectorOverhead(Tp, Index,
916 cast<FixedVectorType>(SubTp));
930 assert(ISD &&
"Invalid opcode");
931 std::pair<InstructionCost, MVT> SrcLT =
933 std::pair<InstructionCost, MVT> DstLT =
936 TypeSize SrcSize = SrcLT.second.getSizeInBits();
937 TypeSize DstSize = DstLT.second.getSizeInBits();
938 bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
939 bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
944 case Instruction::Trunc:
949 case Instruction::BitCast:
952 if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
956 case Instruction::FPExt:
957 if (
I && getTLI()->isExtFree(
I))
960 case Instruction::ZExt:
961 if (TLI->
isZExtFree(SrcLT.second, DstLT.second))
964 case Instruction::SExt:
965 if (
I && getTLI()->isExtFree(
I))
975 if (DstLT.first == SrcLT.first &&
980 case Instruction::AddrSpaceCast:
982 Dst->getPointerAddressSpace()))
987 auto *SrcVTy = dyn_cast<VectorType>(Src);
988 auto *DstVTy = dyn_cast<VectorType>(Dst);
991 if (SrcLT.first == DstLT.first &&
996 if (!SrcVTy && !DstVTy) {
1007 if (DstVTy && SrcVTy) {
1009 if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
1012 if (Opcode == Instruction::ZExt)
1016 if (Opcode == Instruction::SExt)
1017 return SrcLT.first * 2;
1023 return SrcLT.first * 1;
1036 if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
1037 DstVTy->getElementCount().isVector()) {
1040 T *
TTI =
static_cast<T *
>(
this);
1043 (!SplitSrc || !SplitDst) ?
TTI->getVectorSplitCost() : 0;
1050 if (isa<ScalableVectorType>(DstVTy))
1055 unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
1057 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH,
CostKind,
I);
1068 if (Opcode == Instruction::BitCast) {
1079 return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1097 assert(ISD &&
"Invalid opcode");
1106 assert(CondTy &&
"CondTy must exist");
1110 std::pair<InstructionCost, MVT>
LT =
1117 return LT.first * 1;
1123 if (
auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
1124 unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
1128 Opcode, ValVTy->getScalarType(), CondTy, VecPred,
CostKind,
I);
1141 std::pair<InstructionCost, MVT>
LT =
1149 const APInt &DemandedDstElts,
1152 "Unexpected size of DemandedDstElts.");
1170 Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
1174 thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
1184 assert(!Src->isVoidTy() &&
"Invalid type");
1186 if (getTLI()->getValueType(
DL, Src,
true) ==
MVT::Other)
1188 std::pair<InstructionCost, MVT>
LT =
1196 if (Src->isVectorTy() &&
1201 LT.second.getSizeInBits())) {
1227 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment,
true,
false,
1232 const Value *Ptr,
bool VariableMask,
1236 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1243 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) {
1244 auto *VT = cast<FixedVectorType>(VecTy);
1246 unsigned NumElts = VT->getNumElements();
1247 assert(Factor > 1 && NumElts % Factor == 0 &&
"Invalid interleave factor");
1249 unsigned NumSubElts = NumElts / Factor;
1254 if (UseMaskForCond || UseMaskForGaps)
1255 Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1258 Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment,
AddressSpace,
1264 unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1281 if (Cost.
isValid() && VecTySize > VecTyLTSize) {
1284 unsigned NumLegalInsts =
divideCeil(VecTySize, VecTyLTSize);
1288 unsigned NumEltsPerLegalInst =
divideCeil(NumElts, NumLegalInsts);
1291 BitVector UsedInsts(NumLegalInsts,
false);
1292 for (
unsigned Index : Indices)
1293 for (
unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1294 UsedInsts.
set((Index + Elt * Factor) / NumEltsPerLegalInst);
1304 "Interleaved memory op has too many members");
1310 for (
unsigned Index : Indices) {
1311 assert(Index < Factor &&
"Invalid index for interleaved memory op");
1312 for (
unsigned Elm = 0; Elm < NumSubElts; Elm++)
1313 DemandedLoadStoreElts.
setBit(Index + Elm * Factor);
1326 thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
1328 Cost += Indices.size() * InsSubCost;
1330 thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1347 thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
1349 Cost += ExtSubCost * Indices.size();
1350 Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1355 if (!UseMaskForCond)
1360 Cost += thisT()->getReplicationShuffleCost(
1361 I8Type, Factor, NumSubElts,
1362 UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
1370 if (UseMaskForGaps) {
1372 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1397 (RetTy->
isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1406 case Intrinsic::cttz:
1412 case Intrinsic::ctlz:
1419 return thisT()->getMemcpyCost(ICA.
getInst());
1421 case Intrinsic::masked_scatter: {
1423 bool VarMask = !isa<Constant>(
Mask);
1424 Align Alignment = cast<ConstantInt>(
Args[2])->getAlignValue();
1429 case Intrinsic::masked_gather: {
1431 bool VarMask = !isa<Constant>(
Mask);
1432 Align Alignment = cast<ConstantInt>(
Args[1])->getAlignValue();
1436 case Intrinsic::experimental_stepvector: {
1437 if (isa<ScalableVectorType>(RetTy))
1442 case Intrinsic::experimental_vector_extract: {
1445 if (isa<ScalableVectorType>(RetTy))
1447 unsigned Index = cast<ConstantInt>(
Args[1])->getZExtValue();
1450 Index, cast<VectorType>(RetTy));
1452 case Intrinsic::experimental_vector_insert: {
1457 unsigned Index = cast<ConstantInt>(
Args[2])->getZExtValue();
1458 return thisT()->getShuffleCost(
1462 case Intrinsic::experimental_vector_reverse: {
1465 0, cast<VectorType>(RetTy));
1467 case Intrinsic::experimental_vector_splice: {
1468 unsigned Index = cast<ConstantInt>(
Args[2])->getZExtValue();
1471 Index, cast<VectorType>(RetTy));
1473 case Intrinsic::vector_reduce_add:
1474 case Intrinsic::vector_reduce_mul:
1475 case Intrinsic::vector_reduce_and:
1476 case Intrinsic::vector_reduce_or:
1477 case Intrinsic::vector_reduce_xor:
1478 case Intrinsic::vector_reduce_smax:
1479 case Intrinsic::vector_reduce_smin:
1480 case Intrinsic::vector_reduce_fmax:
1481 case Intrinsic::vector_reduce_fmin:
1482 case Intrinsic::vector_reduce_umax:
1483 case Intrinsic::vector_reduce_umin: {
1487 case Intrinsic::vector_reduce_fadd:
1488 case Intrinsic::vector_reduce_fmul: {
1493 case Intrinsic::fshl:
1494 case Intrinsic::fshr: {
1495 if (isa<ScalableVectorType>(RetTy))
1511 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy,
CostKind);
1513 thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,
CostKind);
1514 Cost += thisT()->getArithmeticInstrCost(
1515 BinaryOperator::Shl, RetTy,
CostKind, OpKindX, OpKindZ, OpPropsX);
1516 Cost += thisT()->getArithmeticInstrCost(
1517 BinaryOperator::LShr, RetTy,
CostKind, OpKindY, OpKindZ, OpPropsY);
1521 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1523 OpPropsZ, OpPropsBW);
1528 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1536 case Intrinsic::get_active_lane_mask: {
1542 if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1543 std::pair<InstructionCost, MVT>
LT =
1551 ICA.
getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
1555 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
1566 ScalarizationCost = 0;
1568 ScalarizationCost +=
1570 ScalarizationCost +=
1576 return thisT()->getTypeBasedIntrinsicInstrCost(
Attrs,
CostKind);
1597 unsigned VecTyIndex = 0;
1598 if (IID == Intrinsic::vector_reduce_fadd ||
1599 IID == Intrinsic::vector_reduce_fmul)
1601 assert(Tys.size() > VecTyIndex &&
"Unexpected IntrinsicCostAttributes");
1602 VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
1611 if (isa<ScalableVectorType>(RetTy) ||
any_of(Tys, [](
const Type *Ty) {
1612 return isa<ScalableVectorType>(Ty);
1618 SkipScalarizationCost ? ScalarizationCostPassed : 0;
1619 unsigned ScalarCalls = 1;
1620 Type *ScalarRetTy = RetTy;
1621 if (
auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1622 if (!SkipScalarizationCost)
1624 ScalarCalls =
std::max(ScalarCalls,
1625 cast<FixedVectorType>(RetVTy)->getNumElements());
1629 for (
unsigned i = 0, ie = Tys.size();
i != ie; ++
i) {
1631 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
1632 if (!SkipScalarizationCost)
1634 ScalarCalls =
std::max(ScalarCalls,
1635 cast<FixedVectorType>(VTy)->getNumElements());
1638 ScalarTys.push_back(Ty);
1640 if (ScalarCalls == 1)
1645 thisT()->getIntrinsicInstrCost(ScalarAttrs,
CostKind);
1647 return ScalarCalls * ScalarCost + ScalarizationCost;
1651 case Intrinsic::sqrt:
1654 case Intrinsic::sin:
1657 case Intrinsic::cos:
1660 case Intrinsic::exp:
1663 case Intrinsic::exp2:
1666 case Intrinsic::log:
1669 case Intrinsic::log10:
1675 case Intrinsic::fabs:
1678 case Intrinsic::canonicalize:
1693 case Intrinsic::copysign:
1705 case Intrinsic::nearbyint:
1708 case Intrinsic::rint:
1714 case Intrinsic::roundeven:
1717 case Intrinsic::pow:
1720 case Intrinsic::fma:
1723 case Intrinsic::fmuladd:
1726 case Intrinsic::experimental_constrained_fmuladd:
1730 case Intrinsic::lifetime_start:
1731 case Intrinsic::lifetime_end:
1732 case Intrinsic::sideeffect:
1733 case Intrinsic::pseudoprobe:
1734 case Intrinsic::arithmetic_fence:
1736 case Intrinsic::masked_store: {
1738 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1742 case Intrinsic::masked_load: {
1744 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1748 case Intrinsic::vector_reduce_add:
1751 case Intrinsic::vector_reduce_mul:
1754 case Intrinsic::vector_reduce_and:
1755 return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
1757 case Intrinsic::vector_reduce_or:
1758 return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy,
None,
1760 case Intrinsic::vector_reduce_xor:
1761 return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
1763 case Intrinsic::vector_reduce_fadd:
1764 return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
1766 case Intrinsic::vector_reduce_fmul:
1767 return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
1769 case Intrinsic::vector_reduce_smax:
1770 case Intrinsic::vector_reduce_smin:
1771 case Intrinsic::vector_reduce_fmax:
1772 case Intrinsic::vector_reduce_fmin:
1773 return thisT()->getMinMaxReductionCost(
1776 case Intrinsic::vector_reduce_umax:
1777 case Intrinsic::vector_reduce_umin:
1778 return thisT()->getMinMaxReductionCost(
1786 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1791 Cost += thisT()->getArithmeticInstrCost(
1805 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1811 case Intrinsic::sadd_sat:
1812 case Intrinsic::ssub_sat: {
1817 ? Intrinsic::sadd_with_overflow
1818 : Intrinsic::ssub_with_overflow;
1825 nullptr, ScalarizationCostPassed);
1827 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1833 case Intrinsic::uadd_sat:
1834 case Intrinsic::usub_sat: {
1839 ? Intrinsic::uadd_with_overflow
1840 : Intrinsic::usub_with_overflow;
1844 nullptr, ScalarizationCostPassed);
1851 case Intrinsic::smul_fix:
1852 case Intrinsic::umul_fix: {
1857 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1861 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH,
CostKind);
1864 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
1866 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
1869 Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy,
CostKind,
1872 Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy,
CostKind);
1875 case Intrinsic::sadd_with_overflow:
1876 case Intrinsic::ssub_with_overflow: {
1879 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1881 : BinaryOperator::Sub;
1888 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy,
CostKind);
1889 Cost += 2 * thisT()->getCmpSelInstrCost(
1890 Instruction::ICmp, SumTy, OverflowTy,
1892 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
1896 case Intrinsic::uadd_with_overflow:
1897 case Intrinsic::usub_with_overflow: {
1900 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1902 : BinaryOperator::Sub;
1908 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy,
CostKind);
1910 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
1914 case Intrinsic::smul_with_overflow:
1915 case Intrinsic::umul_with_overflow: {
1920 bool IsSigned = IID == Intrinsic::smul_with_overflow;
1922 unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
1926 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH,
CostKind);
1929 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
1931 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
1936 Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
1940 Cost += thisT()->getCmpSelInstrCost(
1944 case Intrinsic::fptosi_sat:
1945 case Intrinsic::fptoui_sat: {
1948 Type *FromTy = Tys[0];
1949 bool IsSigned = IID == Intrinsic::fptosi_sat;
1954 Cost += thisT()->getIntrinsicInstrCost(Attrs1,
CostKind);
1957 Cost += thisT()->getIntrinsicInstrCost(Attrs2,
CostKind);
1958 Cost += thisT()->getCastInstrCost(
1959 IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
1963 Cost += thisT()->getCmpSelInstrCost(
1965 Cost += thisT()->getCmpSelInstrCost(
1970 case Intrinsic::ctpop:
1976 case Intrinsic::ctlz:
1979 case Intrinsic::cttz:
1982 case Intrinsic::bswap:
1985 case Intrinsic::bitreverse:
1991 std::pair<InstructionCost, MVT>
LT =
1996 for (
unsigned ISD : ISDs) {
1998 if (IID == Intrinsic::fabs &&
LT.second.isFloatingPoint() &&
2008 LegalCost.push_back(
LT.first * 2);
2010 LegalCost.push_back(
LT.first * 1);
2014 CustomCost.push_back(
LT.first * 2);
2018 auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
2019 if (MinLegalCostI != LegalCost.end())
2020 return *MinLegalCostI;
2022 auto MinCustomCostI =
2023 std::min_element(CustomCost.begin(), CustomCost.end());
2024 if (MinCustomCostI != CustomCost.end())
2025 return *MinCustomCostI;
2029 if (IID == Intrinsic::fmuladd)
2030 return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
2032 thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
2034 if (IID == Intrinsic::experimental_constrained_fmuladd) {
2036 Intrinsic::experimental_constrained_fmul, RetTy, Tys);
2038 Intrinsic::experimental_constrained_fadd, RetTy, Tys);
2039 return thisT()->getIntrinsicInstrCost(FMulAttrs,
CostKind) +
2040 thisT()->getIntrinsicInstrCost(FAddAttrs,
CostKind);
2046 if (
auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2048 if (isa<ScalableVectorType>(RetTy) ||
any_of(Tys, [](
const Type *Ty) {
2049 return isa<ScalableVectorType>(Ty);
2054 SkipScalarizationCost ? ScalarizationCostPassed
2057 unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2059 for (
unsigned i = 0, ie = Tys.size();
i != ie; ++
i) {
2063 ScalarTys.push_back(Ty);
2068 for (
unsigned i = 0, ie = Tys.size();
i != ie; ++
i) {
2069 if (
auto *VTy = dyn_cast<VectorType>(Tys[
i])) {
2072 ScalarCalls =
std::max(ScalarCalls,
2073 cast<FixedVectorType>(VTy)->getNumElements());
2076 return ScalarCalls * ScalarCost + ScalarizationCost;
2080 return SingleCallCost;
2101 std::pair<InstructionCost, MVT>
LT =
2103 return LT.first.isValid() ? *
LT.first.getValue() : 0;
2135 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2136 if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
2146 return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
2148 thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
2152 unsigned NumReduxLevels =
Log2_32(NumVecElts);
2155 std::pair<InstructionCost, MVT>
LT =
2156 thisT()->getTLI()->getTypeLegalizationCost(
DL, Ty);
2157 unsigned LongVectorCount = 0;
2159 LT.second.isVector() ?
LT.second.getVectorNumElements() : 1;
2160 while (NumVecElts > MVTLen) {
2165 ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy,
CostKind);
2170 NumReduxLevels -= LongVectorCount;
2178 ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
2181 NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty,
CostKind);
2182 return ShuffleCost + ArithCost +
2183 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2206 if (isa<ScalableVectorType>(Ty))
2209 auto *VTy = cast<FixedVectorType>(Ty);
2214 ArithCost *= VTy->getNumElements();
2216 return ExtractCost + ArithCost;
2234 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2235 unsigned NumReduxLevels =
Log2_32(NumVecElts);
2238 CmpOpcode = Instruction::FCmp;
2241 "expecting floating point or integer type for min/max reduction");
2242 CmpOpcode = Instruction::ICmp;
2246 std::pair<InstructionCost, MVT>
LT =
2247 thisT()->getTLI()->getTypeLegalizationCost(
DL, Ty);
2248 unsigned LongVectorCount = 0;
2250 LT.second.isVector() ?
LT.second.getVectorNumElements() : 1;
2251 while (NumVecElts > MVTLen) {
2259 thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
2267 NumReduxLevels -= LongVectorCount;
2273 ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
2277 (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
2283 return ShuffleCost + MinMaxCost +
2284 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2297 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2305 return RedCost + MulCost + ExtCost;
2332 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
static bool isZeroEltSplatMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses all elements with the same value as the first element of exa...
virtual bool enableWritePrefetching() const
InstructionCost getVectorSplitCost()
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
InstructionCost getFPOpCost(Type *Ty)
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool shouldBuildRelLookupTables() const
This is an optimization pass for GlobalISel generic memory operations.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
static IntegerType * getInt1Ty(LLVMContext &C)
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
virtual Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
A parsed version of the target data layout string in and methods for querying it.
unsigned getInliningThresholdMultiplier()
bool isTruncateFree(Type *Ty1, Type *Ty2)
bool isAlwaysUniform(const Value *V)
Vector Rotate Left Mask Mask Insert
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ BR_JT
BR_JT - Jumptable branch.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
bool isLegalICmpImmediate(int64_t imm)
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Represents a single loop in the control flow graph.
@ BSWAP
Byte Swap and Counting operators.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isTypeLegal(Type *Ty)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
instcombine should handle this C2 when C1
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Type * getReturnType() const
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast) const
The main scalar evolution driver.
const IntrinsicInst * getInst() const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Triple - Helper class for working with autoconf configuration names.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
virtual ~BasicTTIImplBase()=default
@ ICMP_SGT
signed greater than
The instances of the Type class are immutable: once they are created, they are never changed.
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
virtual bool isProfitableToHoist(Instruction *I) const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
InstructionCost getInstructionLatency(const Instruction *I)
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
DiagnosticInfoOptimizationBase::Argument NV
std::pair< InstructionCost, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Type * getElementType() const
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
static bool isReverseMask(ArrayRef< int > Mask)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
Convenience struct for specifying and reasoning about fast-math flags.
virtual bool isCheapToSpeculateCtlz() const
Return true if it is cheap to speculate a call to intrinsic ctlz.
Class to represent fixed width SIMD vectors.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
bool isNumRegsMajorCostOfLSR()
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
bool isSourceOfDivergence(const Value *V)
InstructionCost getScalarizationCost() const
static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS)
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
bool isArch64Bit() const
Test whether the architecture is 64-bit.
bool skipScalarizationCost() const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
LLVM Basic Block Representation.
FastMathFlags getFlags() const
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
@ BRIND
BRIND - Indirect branch.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
bool isScalar() const
Counting predicates.
virtual unsigned getPrefetchDistance() const
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned getNumElements() const
const SmallVectorImpl< Type * > & getArgTypes() const
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned adjustInliningThreshold(const CallBase *CB)
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool useGPUDivergenceAnalysis()
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, or DriverKit).
virtual bool enableWritePrefetching() const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
size_type count() const
count - Returns the number of bits which are set.
static uint64_t round(uint64_t Acc, uint64_t Input)
unsigned getFlatAddressSpace()
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
iterator_range< block_iterator > blocks() const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
bool isVectorTy() const
True if this is an instance of VectorType.
virtual Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
virtual bool isCheapToSpeculateCttz() const
Return true if it is cheap to speculate a call to intrinsic cttz.
unsigned getMaxInterleaveFactor(unsigned VF)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
int getInlinerVectorBonusPercent()
virtual unsigned getCacheLineSize() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
ArchType getArch() const
Get the parsed architecture type of this triple.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
cl::opt< unsigned > PartialUnrollingThreshold
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
static TypeSize getFixed(ScalarTy MinVal)
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool shouldBuildLookupTables()
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Analysis providing profile information.
mir Rename Register Operands
@ FADD
Simple binary floating point operators.
Base class of all SIMD vector types.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
bool slt(const APInt &RHS) const
Signed less than comparison.
This class represents an analyzed expression in the program.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
bool haveFastSqrt(Type *Ty)
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Optional< unsigned > getVScaleForTuning() const
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
This is an important class for using LLVM in a threaded context.
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Drive the analysis of memory accesses in the loop.
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Base class which can be used to help build a TTI implementation.
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
Primary interface to the complete machine description for the target machine.
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
StandardInstrumentations SI(Debug, VerifyEach)
Optional< unsigned > getMaxVScale() const
print Print MemDeps of function
bool isVoidTy() const
Return true if this is 'void'.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
BasicTTIImpl(const TargetMachine *TM, const Function &F)
bool isVector() const
One or more elements.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
Class for arbitrary precision integers.
virtual unsigned getMaxPrefetchIterationsAhead() const
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
bool emitGetActiveLaneMask()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
A cache of @llvm.assume calls within a function.
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
If the specified predicate checks whether a generic pointer falls within a specified address space,...
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
@ ICMP_ULT
unsigned less than
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
InstructionCost getRegUsageForType(Type *Ty)
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
const TargetMachine & getTargetMachine() const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Concrete BasicTTIImpl that can be used if no further customization is needed.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
static bool isSelectMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static const unsigned DefaultLoadLatency
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
constexpr unsigned BitWidth
BlockT * getHeader() const
bool isProfitableToHoist(Instruction *I)
bool isLegalAddImmediate(int64_t imm)
InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
Provides information about what library functions are available for the current target.
bool isTargetIntrinsic() const
isTargetIntrinsic - Returns true if this function is an intrinsic and the intrinsic is specific to a ...
static double log2(double V)
virtual InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
The core instruction combiner logic.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
A wrapper class for inspecting calls to intrinsic functions.
Attributes of a target dependent hardware loop.
static InstructionCost getInvalid(CostType Val=0)
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Intrinsic::ID getID() const
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
@ ICMP_UGT
unsigned greater than
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
size_t size() const
size - Get the array size.
Align max(MaybeAlign Lhs, Align Rhs)
unsigned getNumberOfParts(Type *Tp)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
unsigned getAssumedAddrSpace(const Value *V) const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool sgt(const APInt &RHS) const
Signed greater than comparison.
const char LLVMTargetMachineRef TM
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
virtual Optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isTypeBasedOnly() const
virtual Optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate the cost of performing strict (in-order) reductions, which involves doing a sequence...
APFloat abs(APFloat X)
Returns the absolute value of the argument.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
bool hasBranchDivergence()
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
LLVM Value Representation.
bool isProfitableLSRChainElement(Instruction *I)
const SmallVectorImpl< const Value * > & getArgs() const
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static bool isTransposeMask(ArrayRef< int > Mask)
Return true if this shuffle mask is a transpose mask.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...