Go to the documentation of this file.
25 #include "llvm/IR/IntrinsicsARM.h"
43 #define DEBUG_TYPE "armtti"
47 cl::desc(
"Enable the generation of masked loads and stores"));
51 cl::desc(
"Disable the generation of low-overhead loops"));
55 cl::desc(
"Enable the generation of WLS loops"));
73 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
75 : IntrAlign->getLimitedValue();
89 TM.getSubtargetImpl(*Caller)->getFeatureBits();
91 TM.getSubtargetImpl(*Callee)->getFeatureBits();
94 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
95 (CalleeBits & ~InlineFeaturesAllowed);
98 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
99 (CalleeBits & InlineFeaturesAllowed);
100 return MatchExact && MatchSubset;
106 if (ST->hasMVEIntegerOps())
121 using namespace PatternMatch;
126 case Intrinsic::arm_neon_vld1: {
136 case Intrinsic::arm_neon_vld2:
137 case Intrinsic::arm_neon_vld3:
138 case Intrinsic::arm_neon_vld4:
139 case Intrinsic::arm_neon_vld2lane:
140 case Intrinsic::arm_neon_vld3lane:
141 case Intrinsic::arm_neon_vld4lane:
142 case Intrinsic::arm_neon_vst1:
143 case Intrinsic::arm_neon_vst2:
144 case Intrinsic::arm_neon_vst3:
145 case Intrinsic::arm_neon_vst4:
146 case Intrinsic::arm_neon_vst2lane:
147 case Intrinsic::arm_neon_vst3lane:
148 case Intrinsic::arm_neon_vst4lane: {
152 unsigned AlignArg = II.
arg_size() - 1;
154 MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
164 case Intrinsic::arm_mve_pred_i2v: {
167 if (
match(
Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
173 if (
match(
Arg,
m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
177 if (
auto *CI = dyn_cast<ConstantInt>(XorMask)) {
178 if (CI->getValue().trunc(16).isAllOnes()) {
180 cast<FixedVectorType>(II.
getType())->getNumElements(),
193 case Intrinsic::arm_mve_pred_v2i: {
196 if (
match(
Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
210 case Intrinsic::arm_mve_vadc:
211 case Intrinsic::arm_mve_vadc_predicated: {
213 (II.
getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
215 "Bad type for intrinsic!");
224 case Intrinsic::arm_mve_vmldava: {
226 if (
I->hasOneUse()) {
227 auto *
User = cast<Instruction>(*
I->user_begin());
231 Value *OpX =
I->getOperand(4);
232 Value *OpY =
I->getOperand(5);
238 {
I->getOperand(0),
I->getOperand(1),
239 I->getOperand(2), OpZ, OpX, OpY});
255 SimplifyAndSetOp)
const {
260 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
261 unsigned NumElts = cast<FixedVectorType>(II.
getType())->getNumElements();
262 unsigned IsTop = cast<ConstantInt>(II.
getOperand(TopOpc))->getZExtValue();
269 SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
279 case Intrinsic::arm_mve_vcvt_narrow:
280 SimplifyNarrowInstrTopBottom(2);
282 case Intrinsic::arm_mve_vqmovn:
283 SimplifyNarrowInstrTopBottom(4);
285 case Intrinsic::arm_mve_vshrn:
286 SimplifyNarrowInstrTopBottom(7);
298 if (
Bits == 0 ||
Imm.getActiveBits() >= 64)
301 int64_t SImmVal =
Imm.getSExtValue();
303 if (!ST->isThumb()) {
304 if ((SImmVal >= 0 && SImmVal < 65536) ||
308 return ST->hasV6T2Ops() ? 2 : 3;
311 if ((SImmVal >= 0 && SImmVal < 65536) ||
315 return ST->hasV6T2Ops() ? 2 : 3;
318 if (
Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
330 if (
Imm.isNonNegative() &&
Imm.getLimitedValue() < 256)
346 C->getValue() ==
Imm &&
Imm.isNegative() &&
Imm.isNegatedPowerOf2()) {
348 auto isSSatMin = [&](
Value *MinInst) {
349 if (isa<SelectInst>(MinInst)) {
350 Value *MinLHS, *MinRHS;
363 return cast<Instruction>(Inst->
getOperand(1))->getOperand(1);
374 if (
Imm.getBitWidth() != 64 ||
378 if (!
FP && isa<ICmpInst>(Inst) && Inst->
hasOneUse())
382 return isa<FPToSIInst>(
FP);
393 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
394 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
400 if (Opcode == Instruction::GetElementPtr && Idx != 0)
403 if (Opcode == Instruction::And) {
405 if (
Imm == 255 ||
Imm == 65535)
417 if (Opcode == Instruction::ICmp &&
Imm.isNegative() &&
419 int64_t NegImm = -
Imm.getSExtValue();
420 if (ST->
isThumb2() && NegImm < 1<<12)
423 if (ST->isThumb() && NegImm < 1<<8)
429 if (Opcode == Instruction::Xor &&
Imm.isAllOnes())
434 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->
isThumb2()) &&
437 (isa<ICmpInst>(Inst) && Inst->
hasOneUse() &&
446 if (Inst && Opcode == Instruction::ICmp && Idx == 1 &&
Imm.isAllOnesValue()) {
460 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
476 assert(ISD &&
"Invalid opcode");
481 return Cost == 0 ? 0 : 1;
484 auto IsLegalFPType = [
this](
EVT VT) {
487 (EltVT ==
MVT::f64 && ST->hasFP64()) ||
488 (EltVT ==
MVT::f16 && ST->hasFullFP16());
501 if ((ST->hasMVEIntegerOps() &&
502 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
503 Opcode == Instruction::SExt)) ||
504 (ST->hasMVEFloatOps() &&
505 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
506 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
530 return AdjustCost(Entry->Cost);
549 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
550 if (
const auto *Entry =
561 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
562 if (
const auto *Entry =
578 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
579 if (
const auto *Entry =
589 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
590 if (
const auto *Entry =
599 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.
isVector()) {
615 auto *
User = cast<Instruction>(*
I->user_begin());
620 return AdjustCost(Entry->Cost);
625 if (Src->isVectorTy() && ST->hasNEON() &&
638 return AdjustCost(
LT.first * Entry->Cost);
727 if (SrcTy.
isVector() && ST->hasNEON()) {
731 return AdjustCost(Entry->Cost);
761 return AdjustCost(Entry->Cost);
788 if (SrcTy.
isInteger() && ST->hasNEON()) {
792 return AdjustCost(Entry->Cost);
813 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
830 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
833 return Lanes * CallCost;
864 return AdjustCost(Entry->Cost);
867 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
878 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
882 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
883 Opcode == Instruction::ExtractElement)) {
886 if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
893 return std::max<InstructionCost>(
897 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
898 Opcode == Instruction::ExtractElement)) {
902 std::pair<InstructionCost, MVT>
LT =
945 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
947 Sel = cast<Instruction>(Sel->
user_back());
1005 std::pair<InstructionCost, MVT>
LT =
1010 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy() &&
1011 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1012 cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
1014 FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
1019 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1030 std::pair<InstructionCost, MVT>
LT =
1038 if (
LT.second.getVectorNumElements() > 2) {
1040 return LT.first * BaseCost +
1049 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy())
1063 unsigned NumVectorInstToHideOverhead = 10;
1064 int MaxMergeDistance = 64;
1066 if (ST->hasNEON()) {
1069 return NumVectorInstToHideOverhead;
1083 case Intrinsic::arm_mve_vctp8:
1084 case Intrinsic::arm_mve_vctp16:
1085 case Intrinsic::arm_mve_vctp32:
1086 case Intrinsic::arm_mve_vctp64:
1099 if (
auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
1101 if (VecTy->getNumElements() == 2)
1106 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1111 return (EltWidth == 32 && Alignment >= 4) ||
1112 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1120 return ((EltWidth == 32 && Alignment >= 4) ||
1121 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1129 unsigned DstAddrSpace = ~0u;
1130 unsigned SrcAddrSpace = ~0u;
1131 const Function *
F =
I->getParent()->getParent();
1133 if (
const auto *MC = dyn_cast<MemTransferInst>(
I)) {
1134 ConstantInt *
C = dyn_cast<ConstantInt>(MC->getLength());
1139 const unsigned Size =
C->getValue().getZExtValue();
1140 const Align DstAlign = *MC->getDestAlign();
1141 const Align SrcAlign = *MC->getSourceAlign();
1143 MOp =
MemOp::Copy(Size,
false, DstAlign, SrcAlign,
1145 DstAddrSpace = MC->getDestAddressSpace();
1146 SrcAddrSpace = MC->getSourceAddressSpace();
1148 else if (
const auto *MS = dyn_cast<MemSetInst>(
I)) {
1149 ConstantInt *
C = dyn_cast<ConstantInt>(MS->getLength());
1154 const unsigned Size =
C->getValue().getZExtValue();
1155 const Align DstAlign = *MS->getDestAlign();
1159 DstAddrSpace = MS->getDestAddressSpace();
1164 unsigned Limit, Factor = 2;
1165 switch(
I->getIntrinsicID()) {
1169 case Intrinsic::memmove:
1172 case Intrinsic::memset:
1183 std::vector<EVT> MemOps;
1184 if (getTLI()->findOptimalMemOpLowering(
1185 MemOps, Limit, MOp, DstAddrSpace,
1186 SrcAddrSpace,
F->getAttributes()))
1187 return MemOps.size() * Factor;
1208 if (ST->hasNEON()) {
1225 if (
const auto *Entry =
1227 return LT.first * Entry->Cost;
1246 if (
const auto *Entry =
1248 return LT.first * Entry->Cost;
1272 return LT.first * Entry->Cost;
1275 if (ST->hasMVEIntegerOps()) {
1288 return LT.first * Entry->Cost *
1292 if (!
Mask.empty()) {
1294 if (
LT.second.isVector() &&
1295 Mask.size() <=
LT.second.getVectorNumElements() &&
1302 int BaseCost = ST->hasMVEIntegerOps() && Tp->
isVectorTy()
1319 switch (ISDOpcode) {
1332 if (ST->hasNEON()) {
1333 const unsigned FunctionCallDivCost = 20;
1334 const unsigned ReciprocalDivCost = 10;
1377 return LT.first * Entry->Cost;
1380 Opcode, Ty,
CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
1398 auto LooksLikeAFreeShift = [&]() {
1408 switch (cast<Instruction>(CxtI->
user_back())->getOpcode()) {
1410 case Instruction::Sub:
1411 case Instruction::And:
1412 case Instruction::Xor:
1413 case Instruction::Or:
1414 case Instruction::ICmp:
1420 if (LooksLikeAFreeShift())
1426 if (ST->hasMVEIntegerOps() && Ty->
isVectorTy())
1434 return LT.first * BaseCost;
1437 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1438 unsigned Num = VTy->getNumElements();
1464 if (ST->hasNEON() && Src->isVectorTy() &&
1465 (Alignment && *Alignment !=
Align(16)) &&
1466 cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
1470 return LT.first * 4;
1475 if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) &&
I &&
1477 isa<FPExtInst>(*
I->user_begin())) ||
1483 : cast<Instruction>(
I->getOperand(0))->getOperand(0)->getType();
1489 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1500 if (ST->hasMVEIntegerOps()) {
1506 if (!isa<FixedVectorType>(Src))
1511 return cast<FixedVectorType>(Src)->getNumElements() * 8;
1517 bool UseMaskForCond,
bool UseMaskForGaps) {
1518 assert(Factor >= 2 &&
"Invalid interleave factor");
1519 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
1524 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1525 !UseMaskForCond && !UseMaskForGaps) {
1526 unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
1535 if (NumElts % Factor == 0 &&
1544 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1547 return 2 * BaseCost;
1552 UseMaskForCond, UseMaskForGaps);
1556 unsigned Opcode,
Type *DataTy,
const Value *Ptr,
bool VariableMask,
1558 using namespace PatternMatch;
1564 auto *VTy = cast<FixedVectorType>(DataTy);
1568 unsigned NumElems = VTy->getNumElements();
1569 unsigned EltSize = VTy->getScalarSizeInBits();
1570 std::pair<InstructionCost, MVT>
LT = TLI->getTypeLegalizationCost(
DL, DataTy);
1578 NumElems *
LT.first * ST->getMVEVectorCostFactor(
CostKind);
1585 if (EltSize < 8 || Alignment < EltSize / 8)
1588 unsigned ExtSize = EltSize;
1595 match(
I, m_Intrinsic<Intrinsic::masked_gather>())) &&
1597 const User *Us = *
I->users().begin();
1598 if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
1601 cast<Instruction>(Us)->getType()->getScalarSizeInBits();
1602 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1603 (
TypeSize == 16 && EltSize == 8)) &&
1612 match(
I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
1613 (
T = dyn_cast<TruncInst>(
I->getOperand(0)))) {
1615 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1616 if (((EltSize == 16 &&
TypeSize == 32) ||
1623 if (ExtSize * NumElems != 128 || NumElems < 4)
1632 if (ExtSize != 8 && ExtSize != 16)
1635 if (
const auto *BC = dyn_cast<BitCastInst>(Ptr))
1636 Ptr = BC->getOperand(0);
1637 if (
const auto *
GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1638 if (
GEP->getNumOperands() != 2)
1642 if (Scale != 1 && Scale * 8 != ExtSize)
1645 if (
const auto *ZExt = dyn_cast<ZExtInst>(
GEP->getOperand(1))) {
1646 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1687 std::pair<InstructionCost, MVT>
LT =
1701 (
LT.second ==
MVT::v8i16 && RevVTSize <= (IsMLA ? 64u : 32u)) ||
1713 switch (ICA.
getID()) {
1714 case Intrinsic::get_active_lane_mask:
1722 if (ST->hasMVEIntegerOps())
1725 case Intrinsic::sadd_sat:
1726 case Intrinsic::ssub_sat:
1727 case Intrinsic::uadd_sat:
1728 case Intrinsic::usub_sat: {
1729 if (!ST->hasMVEIntegerOps())
1749 if (!ST->hasMVEIntegerOps())
1761 if (!ST->hasMVEFloatOps())
1769 case Intrinsic::fptosi_sat:
1770 case Intrinsic::fptoui_sat: {
1773 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1783 if (ST->hasMVEFloatOps() &&
1791 (ST->hasFullFP16() &&
LT.second ==
MVT::f16) ||
1792 (ST->hasMVEFloatOps() &&
1796 LT.second.getScalarSizeInBits());
1801 LegalTy, {LegalTy, LegalTy});
1805 LegalTy, {LegalTy, LegalTy});
1807 return LT.first * Cost;
1817 if (!
F->isIntrinsic())
1821 if (
F->getName().startswith(
"llvm.arm"))
1824 switch (
F->getIntrinsicID()) {
1827 case Intrinsic::sin:
1828 case Intrinsic::cos:
1829 case Intrinsic::pow:
1830 case Intrinsic::log:
1831 case Intrinsic::log10:
1833 case Intrinsic::exp:
1834 case Intrinsic::exp2:
1836 case Intrinsic::sqrt:
1837 case Intrinsic::fabs:
1838 case Intrinsic::copysign:
1842 case Intrinsic::rint:
1843 case Intrinsic::nearbyint:
1845 case Intrinsic::canonicalize:
1846 case Intrinsic::lround:
1847 case Intrinsic::llround:
1848 case Intrinsic::lrint:
1849 case Intrinsic::llrint:
1850 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
1852 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
1858 case Intrinsic::masked_store:
1859 case Intrinsic::masked_load:
1860 case Intrinsic::masked_gather:
1861 case Intrinsic::masked_scatter:
1862 return !ST->hasMVEIntegerOps();
1863 case Intrinsic::sadd_with_overflow:
1864 case Intrinsic::uadd_with_overflow:
1865 case Intrinsic::ssub_with_overflow:
1866 case Intrinsic::usub_with_overflow:
1867 case Intrinsic::sadd_sat:
1868 case Intrinsic::uadd_sat:
1869 case Intrinsic::ssub_sat:
1870 case Intrinsic::usub_sat:
1885 if (
auto *Call = dyn_cast<CallInst>(&
I)) {
1886 if (
auto *II = dyn_cast<IntrinsicInst>(Call)) {
1889 case Intrinsic::memset:
1890 case Intrinsic::memmove:
1893 if (
const Function *
F = Call->getCalledFunction())
1902 switch (
I.getOpcode()) {
1905 case Instruction::FPToSI:
1906 case Instruction::FPToUI:
1907 case Instruction::SIToFP:
1908 case Instruction::UIToFP:
1909 case Instruction::FPTrunc:
1910 case Instruction::FPExt:
1940 switch (
I.getOpcode()) {
1943 case Instruction::Alloca:
1947 case Instruction::PHI:
1954 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
1958 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
1981 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1986 const SCEV *TripCountSCEV =
1992 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
2000 if (
auto *Call = dyn_cast<IntrinsicInst>(&
I)) {
2001 switch (Call->getIntrinsicID()) {
2004 case Intrinsic::start_loop_iterations:
2005 case Intrinsic::test_start_loop_iterations:
2006 case Intrinsic::loop_decrement:
2007 case Intrinsic::loop_decrement_reg:
2017 bool IsTailPredLoop =
false;
2018 auto ScanLoop = [&](
Loop *L) {
2020 for (
auto &
I : *
BB) {
2022 isa<InlineAsm>(
I)) {
2026 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
2039 for (
auto Inner : *L)
2040 if (!ScanLoop(Inner))
2062 if (isa<ICmpInst>(&
I) && ++ICmpCount > 1)
2071 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
2079 if (isa<FCmpInst>(&
I))
2084 if (isa<FPExtInst>(&
I) || isa<FPTruncInst>(&
I))
2088 if (isa<SExtInst>(&
I) || isa<ZExtInst>(&
I) )
2089 if (!
I.getOperand(0)->hasOneUse() || !isa<LoadInst>(
I.getOperand(0)))
2093 if (isa<TruncInst>(&
I) )
2094 if (!
I.hasOneUse() || !isa<StoreInst>(*
I.user_begin()))
2113 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2124 bool ReductionsDisabled =
2128 for (
auto *
I : LiveOuts) {
2129 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2130 !
I->getType()->isHalfTy()) {
2131 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2132 "live-out value\n");
2135 if (ReductionsDisabled) {
2148 if (isa<PHINode>(&
I))
2156 if (
T->getScalarSizeInBits() > 32) {
2160 if (isa<StoreInst>(
I) || isa<LoadInst>(
I)) {
2163 int64_t NextStride =
getPtrStride(PSE, AccessTy, Ptr, L);
2164 if (NextStride == 1) {
2169 }
else if (NextStride == -1 ||
2173 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2174 "be tail-predicated\n.");
2183 if (
auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
2184 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2190 "tail-predicate\n.");
2196 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2214 if (!ST->hasMVEIntegerOps())
2219 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2228 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2236 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2242 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2281 <<
"Exit blocks: " << ExitingBlocks.size() <<
"\n");
2285 if (ExitingBlocks.size() > 2)
2301 for (
auto &
I : *
BB) {
2304 if (
I.getType()->isVectorTy())
2307 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2329 unsigned ExitingValues = 0;
2332 for (
auto *Exit : ExitBlocks) {
2335 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2336 return PH.getNumOperands() != 1 ||
2337 !isa<GetElementPtrInst>(PH.getOperand(0));
2339 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2370 if (!ST->hasMVEIntegerOps())
2376 return ScalarBits <= 64;
2384 if (!ST->hasMVEIntegerOps())
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
@ SPF_SMAX
Unsigned minimum.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This is an optimization pass for GlobalISel generic memory operations.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType >> Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
A parsed version of the target data layout string in and methods for querying it.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
DominatorTree & getDominatorTree() const
const Function * getParent() const
Return the enclosing method, or null if none.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
ScalarTy getFixedSize() const
Represents a single loop in the control flow graph.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
SelectPatternFlavor Flavor
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool hasFPARMv8Base() const
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Type * getReturnType() const
The main scalar evolution driver.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
@ ICMP_SGT
signed greater than
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
The instances of the Type class are immutable: once they are created, they are never changed.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
unsigned getBitWidth() const
Return the number of bits in the APInt.
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
@ SPF_UMAX
Signed maximum.
Container class for subtarget features.
std::pair< InstructionCost, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
user_iterator user_begin()
@ ICMP_SLE
signed less or equal
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
SelectPatternFlavor
Specific patterns of select instructions we can match.
Class to represent fixed width SIMD vectors.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
static IntegerType * getInt32Ty(LLVMContext &C)
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
bool isLegalMaskedLoad(Type *DataTy, Align Alignment)
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
LLVM Basic Block Representation.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned getNumElements() const
const SmallVectorImpl< Type * > & getArgTypes() const
This is the shared class of boolean and integer constants.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
Type Conversion Cost Table.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
bool match(Val *V, const Pattern &P)
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
(vector float) vec_cmpeq(*A, *B) C
static uint64_t round(uint64_t Acc, uint64_t Input)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
iterator_range< block_iterator > blocks() const
bool isVectorTy() const
True if this is an instance of VectorType.
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
bool useSoftFloat() const override
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bool isInteger() const
Return true if this is an integer or a vector integer type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isLoweredToCall(const Function *F)
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
@ AND
Bitwise operators - logical and, logical or, logical xor.
bool emitGetActiveLaneMask() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const
unsigned getIntegerBitWidth() const
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
bool isProfitableLSRChainElement(Instruction *I)
mir Rename Register Operands
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Base class of all SIMD vector types.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
This class represents an analyzed expression in the program.
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an important base class in LLVM.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType >> Tbl, int ISD, MVT Ty)
Find in cost table.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isThumb1Only() const
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
InstructionCost getMemcpyCost(const Instruction *I)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Op1Info=TTI::OK_AnyValue, TTI::OperandValueKind Op2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
AssumptionCache & getAssumptionCache() const
const DataLayout & getDataLayout() const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
This class represents a truncation of integer types.
This is an important class for using LLVM in a threaded context.
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
initializer< Ty > init(const Ty &Val)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Drive the analysis of memory accesses in the loop.
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
@ SPF_ABS
Floating point maxnum.
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
print Print MemDeps of function
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with args, mangled using Types.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
ConstantInt * getTrue()
Get the constant value for i1 true.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Class for arbitrary precision integers.
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
bool isVector() const
Return true if this is a vector value type.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
uint64_t getScalarSizeInBits() const
A cache of @llvm.assume calls within a function.
bool isLegalMaskedGather(Type *Ty, Align Alignment)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
if(llvm_vc STREQUAL "") set(fake_version_inc "$
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
LLVMContext & getContext() const
All values hold a context through their type.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
@ SPF_FMINNUM
Unsigned maximum.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
@ SPF_FMAXNUM
Floating point minnum.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
const TargetMachine & getTargetMachine() const
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
uint64_t value() const
This is a hole in the type system and should not be abused.
unsigned arg_size() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
BlockT * getHeader() const
Provides information about what library functions are available for the current target.
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
static double log2(double V)
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth=0)=0
The core instruction combiner logic.
@ ADD
Simple integer binary arithmetic operators.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
A wrapper class for inspecting calls to intrinsic functions.
Attributes of a target dependent hardware loop.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool maybeLoweredToCall(Instruction &I)
Intrinsic::ID getID() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
@ SHL
Shift and rotation operations.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Value * getArgOperand(unsigned i) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
bool isLegalMaskedStore(Type *DataTy, Align Alignment)
@ SPF_UMIN
Signed minimum.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool canAnalyze(LoopInfo &LI)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Type * getType() const
Return the LLVM type of this SCEV expression.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const char LLVMTargetMachineRef TM
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * getOperand(unsigned i) const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
@ SIGN_EXTEND
Conversion operators.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
bool isFixedLengthVector() const
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
LLVM Value Representation.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
@ ForceEnabledNoReductions