45#define DEBUG_TYPE "vector-combine"
51STATISTIC(NumVecLoad,
"Number of vector loads formed");
52STATISTIC(NumVecCmp,
"Number of vector compares formed");
53STATISTIC(NumVecBO,
"Number of vector binops formed");
54STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
55STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
56STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
57STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
58STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
62 cl::desc(
"Disable all vector combine transforms"));
66 cl::desc(
"Disable binop extract to shuffle transforms"));
70 cl::desc(
"Max number of instructions to scan for vector combining."));
72static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
80 bool TryEarlyFoldsOnly)
83 SQ(*
DL, nullptr, &DT, &AC),
84 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
91 const TargetTransformInfo &TTI;
92 const DominatorTree &DT;
96 const SimplifyQuery SQ;
100 bool TryEarlyFoldsOnly;
102 InstructionWorklist Worklist;
111 bool vectorizeLoadInsert(Instruction &
I);
112 bool widenSubvectorLoad(Instruction &
I);
113 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
114 ExtractElementInst *Ext1,
115 unsigned PreferredExtractIndex)
const;
116 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
117 const Instruction &
I,
118 ExtractElementInst *&ConvertToShuffle,
119 unsigned PreferredExtractIndex);
122 bool foldExtractExtract(Instruction &
I);
123 bool foldInsExtFNeg(Instruction &
I);
124 bool foldInsExtBinop(Instruction &
I);
125 bool foldInsExtVectorToShuffle(Instruction &
I);
126 bool foldBitOpOfCastops(Instruction &
I);
127 bool foldBitOpOfCastConstant(Instruction &
I);
128 bool foldBitcastShuffle(Instruction &
I);
129 bool scalarizeOpOrCmp(Instruction &
I);
130 bool scalarizeVPIntrinsic(Instruction &
I);
131 bool foldExtractedCmps(Instruction &
I);
132 bool foldSelectsFromBitcast(Instruction &
I);
133 bool foldBinopOfReductions(Instruction &
I);
134 bool foldSingleElementStore(Instruction &
I);
135 bool scalarizeLoad(Instruction &
I);
136 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
138 bool scalarizeExtExtract(Instruction &
I);
139 bool foldConcatOfBoolMasks(Instruction &
I);
140 bool foldPermuteOfBinops(Instruction &
I);
141 bool foldShuffleOfBinops(Instruction &
I);
142 bool foldShuffleOfSelects(Instruction &
I);
143 bool foldShuffleOfCastops(Instruction &
I);
144 bool foldShuffleOfShuffles(Instruction &
I);
145 bool foldPermuteOfIntrinsic(Instruction &
I);
146 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
147 bool foldShuffleOfIntrinsics(Instruction &
I);
148 bool foldShuffleToIdentity(Instruction &
I);
149 bool foldShuffleFromReductions(Instruction &
I);
150 bool foldShuffleChainsToReduce(Instruction &
I);
151 bool foldCastFromReductions(Instruction &
I);
152 bool foldSignBitReductionCmp(Instruction &
I);
153 bool foldICmpEqZeroVectorReduce(Instruction &
I);
154 bool foldEquivalentReductionCmp(Instruction &
I);
155 bool foldReduceAddCmpZero(Instruction &
I);
156 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
157 bool foldInterleaveIntrinsics(Instruction &
I);
158 bool shrinkType(Instruction &
I);
159 bool shrinkLoadForShuffles(Instruction &
I);
160 bool shrinkPhiOfShuffles(Instruction &
I);
162 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
168 Worklist.pushUsersToWorkList(*NewI);
169 Worklist.pushValue(NewI);
186 SmallPtrSet<Value *, 4> Visited;
191 OpI,
nullptr,
nullptr, [&](
Value *V) {
196 NextInst = NextInst->getNextNode();
201 Worklist.pushUsersToWorkList(*OpI);
202 Worklist.pushValue(OpI);
222 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
223 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
229 Type *ScalarTy = Load->getType()->getScalarType();
231 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
232 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
239bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
265 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
268 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
269 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
270 unsigned OffsetEltIndex = 0;
278 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
279 APInt
Offset(OffsetBitWidth, 0);
289 uint64_t ScalarSizeInBytes = ScalarSize / 8;
290 if (
Offset.urem(ScalarSizeInBytes) != 0)
294 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
295 if (OffsetEltIndex >= MinVecNumElts)
312 unsigned AS =
Load->getPointerAddressSpace();
331 unsigned OutputNumElts = Ty->getNumElements();
333 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
334 Mask[0] = OffsetEltIndex;
341 if (OldCost < NewCost || !NewCost.
isValid())
352 replaceValue(
I, *VecLd);
360bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
363 if (!Shuf->isIdentityWithPadding())
369 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
370 return M >= (int)(NumOpElts);
381 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
390 unsigned AS =
Load->getPointerAddressSpace();
405 if (OldCost < NewCost || !NewCost.
isValid())
412 replaceValue(
I, *VecLd);
419ExtractElementInst *VectorCombine::getShuffleExtract(
420 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
424 assert(Index0C && Index1C &&
"Expected constant extract indexes");
426 unsigned Index0 = Index0C->getZExtValue();
427 unsigned Index1 = Index1C->getZExtValue();
430 if (Index0 == Index1)
454 if (PreferredExtractIndex == Index0)
456 if (PreferredExtractIndex == Index1)
460 return Index0 > Index1 ? Ext0 : Ext1;
468bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
469 ExtractElementInst *Ext1,
470 const Instruction &
I,
471 ExtractElementInst *&ConvertToShuffle,
472 unsigned PreferredExtractIndex) {
475 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
477 unsigned Opcode =
I.getOpcode();
490 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
491 "Expected a compare");
501 unsigned Ext0Index = Ext0IndexC->getZExtValue();
502 unsigned Ext1Index = Ext1IndexC->getZExtValue();
516 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
517 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
518 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
523 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
528 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
530 OldCost = CheapExtractCost + ScalarOpCost;
531 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
535 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
536 NewCost = VectorOpCost + CheapExtractCost +
541 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
542 if (ConvertToShuffle) {
554 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
556 ShuffleMask[BestInsIndex] = BestExtIndex;
558 VecTy, VecTy, ShuffleMask,
CostKind, 0,
559 nullptr, {ConvertToShuffle});
562 VecTy, VecTy, {},
CostKind, 0,
nullptr,
570 return OldCost < NewCost;
582 ShufMask[NewIndex] = OldIndex;
583 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
635 V1,
"foldExtExtBinop");
640 VecBOInst->copyIRFlags(&
I);
646bool VectorCombine::foldExtractExtract(Instruction &
I) {
667 unsigned NumElts = FixedVecTy->getNumElements();
668 if (C0 >= NumElts || C1 >= NumElts)
684 ExtractElementInst *ExtractToChange;
685 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
691 if (ExtractToChange) {
692 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
697 if (ExtractToChange == Ext0)
706 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
707 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
710 replaceValue(
I, *NewExt);
716bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
719 uint64_t ExtIdx, InsIdx;
734 auto *DstVecScalarTy = DstVecTy->getScalarType();
736 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
741 unsigned NumDstElts = DstVecTy->getNumElements();
742 unsigned NumSrcElts = SrcVecTy->getNumElements();
743 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
749 SmallVector<int>
Mask(NumDstElts);
750 std::iota(
Mask.begin(),
Mask.end(), 0);
751 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
767 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
770 SmallVector<int> SrcMask;
773 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
775 DstVecTy, SrcVecTy, SrcMask,
CostKind);
779 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
781 if (NewCost > OldCost)
784 Value *NewShuf, *LenChgShuf =
nullptr;
798 replaceValue(
I, *NewShuf);
804bool VectorCombine::foldInsExtBinop(Instruction &
I) {
805 BinaryOperator *VecBinOp, *SclBinOp;
837 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
839 if (NewCost > OldCost)
850 NewInst->copyIRFlags(VecBinOp);
851 NewInst->andIRFlags(SclBinOp);
856 replaceValue(
I, *NewBO);
862bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
865 if (!BinOp || !BinOp->isBitwiseLogicOp())
871 if (!LHSCast || !RHSCast) {
872 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
878 if (CastOpcode != RHSCast->getOpcode())
882 switch (CastOpcode) {
883 case Instruction::BitCast:
884 case Instruction::Trunc:
885 case Instruction::SExt:
886 case Instruction::ZExt:
892 Value *LHSSrc = LHSCast->getOperand(0);
893 Value *RHSSrc = RHSCast->getOperand(0);
899 auto *SrcTy = LHSSrc->
getType();
900 auto *DstTy =
I.getType();
903 if (CastOpcode != Instruction::BitCast &&
908 if (!SrcTy->getScalarType()->isIntegerTy() ||
909 !DstTy->getScalarType()->isIntegerTy())
924 LHSCastCost + RHSCastCost;
935 if (!LHSCast->hasOneUse())
936 NewCost += LHSCastCost;
937 if (!RHSCast->hasOneUse())
938 NewCost += RHSCastCost;
941 <<
" NewCost=" << NewCost <<
"\n");
943 if (NewCost > OldCost)
948 BinOp->getName() +
".inner");
950 NewBinOp->copyIRFlags(BinOp);
964 replaceValue(
I, *Result);
973bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
989 switch (CastOpcode) {
990 case Instruction::BitCast:
991 case Instruction::ZExt:
992 case Instruction::SExt:
993 case Instruction::Trunc:
999 Value *LHSSrc = LHSCast->getOperand(0);
1001 auto *SrcTy = LHSSrc->
getType();
1002 auto *DstTy =
I.getType();
1005 if (CastOpcode != Instruction::BitCast &&
1010 if (!SrcTy->getScalarType()->isIntegerTy() ||
1011 !DstTy->getScalarType()->isIntegerTy())
1015 PreservedCastFlags RHSFlags;
1040 if (!LHSCast->hasOneUse())
1041 NewCost += LHSCastCost;
1043 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1044 <<
" NewCost=" << NewCost <<
"\n");
1046 if (NewCost > OldCost)
1051 LHSSrc, InvC,
I.getName() +
".inner");
1053 NewBinOp->copyIRFlags(&
I);
1073 replaceValue(
I, *Result);
1080bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1094 if (!DestTy || !SrcTy)
1097 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1098 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1099 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1109 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1110 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1114 SmallVector<int, 16> NewMask;
1115 if (DestEltSize <= SrcEltSize) {
1118 if (SrcEltSize % DestEltSize != 0)
1120 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1125 if (DestEltSize % SrcEltSize != 0)
1127 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1134 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1135 auto *NewShuffleTy =
1137 auto *OldShuffleTy =
1139 unsigned NumOps = IsUnary ? 1 : 2;
1149 TargetTransformInfo::CastContextHint::None,
1154 TargetTransformInfo::CastContextHint::None,
1157 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1158 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1160 if (NewCost > OldCost || !NewCost.
isValid())
1168 replaceValue(
I, *Shuf);
1175bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1189 if (!ScalarOp0 || !ScalarOp1)
1197 auto IsAllTrueMask = [](
Value *MaskVal) {
1200 return ConstValue->isAllOnesValue();
1214 SmallVector<int>
Mask;
1216 Mask.resize(FVTy->getNumElements(), 0);
1225 Args.push_back(
V->getType());
1226 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1231 std::optional<unsigned> FunctionalOpcode =
1233 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1234 if (!FunctionalOpcode) {
1243 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1253 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1255 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1258 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1261 if (OldCost < NewCost || !NewCost.
isValid())
1272 bool SafeToSpeculate;
1278 *FunctionalOpcode, &VPI,
nullptr, SQ.
AC, SQ.
DT);
1279 if (!SafeToSpeculate &&
1286 {ScalarOp0, ScalarOp1})
1288 ScalarOp0, ScalarOp1);
1297bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1302 if (!UO && !BO && !CI && !
II)
1310 if (Arg->getType() !=
II->getType() &&
1320 for (User *U :
I.users())
1327 std::optional<uint64_t>
Index;
1329 auto Ops =
II ?
II->args() :
I.operands();
1333 uint64_t InsIdx = 0;
1338 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1344 else if (InsIdx != *Index)
1361 if (!
Index.has_value())
1365 Type *ScalarTy = VecTy->getScalarType();
1366 assert(VecTy->isVectorTy() &&
1369 "Unexpected types for insert element into binop or cmp");
1371 unsigned Opcode =
I.getOpcode();
1379 }
else if (UO || BO) {
1383 IntrinsicCostAttributes ScalarICA(
1384 II->getIntrinsicID(), ScalarTy,
1387 IntrinsicCostAttributes VectorICA(
1388 II->getIntrinsicID(), VecTy,
1395 Value *NewVecC =
nullptr;
1397 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1400 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1402 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1416 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1418 II->getIntrinsicID(), Idx, &
TTI)))
1421 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1422 OldCost += InsertCost;
1423 NewCost += !
Op->hasOneUse() * InsertCost;
1427 if (OldCost < NewCost || !NewCost.
isValid())
1437 ++NumScalarIntrinsic;
1447 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1453 Scalar->setName(
I.getName() +
".scalar");
1458 ScalarInst->copyIRFlags(&
I);
1461 replaceValue(
I, *Insert);
1468bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1473 if (!BI || !
I.getType()->isIntegerTy(1))
1478 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1481 CmpPredicate
P0,
P1;
1493 uint64_t Index0, Index1;
1500 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1503 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1504 "Unknown ExtractElementInst");
1509 unsigned CmpOpcode =
1524 Ext0Cost + Ext1Cost + CmpCost * 2 +
1530 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1531 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1536 ShufMask[CheapIndex] = ExpensiveIndex;
1541 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1542 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1547 if (OldCost < NewCost || !NewCost.
isValid())
1557 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1558 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1561 replaceValue(
I, *NewExt);
1588bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1595 if (!SrcVecTy || !DstVecTy)
1605 if (SrcEltBits != 32 && SrcEltBits != 64)
1608 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1625 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1628 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1631 if (!BC->hasNUsesOrMore(MinSelects))
1636 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1638 for (User *U : BC->users()) {
1643 for (User *ExtUser : Ext->users()) {
1647 Cond->getType()->isIntegerTy(1))
1652 if (CondToSelects.
empty())
1655 bool MadeChange =
false;
1656 Value *SrcVec = BC->getOperand(0);
1659 for (
auto [
Cond, Selects] : CondToSelects) {
1661 if (Selects.size() < MinSelects) {
1662 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1663 <<
"profitable (VecCost=" << VecSelCost
1664 <<
", ScalarCost=" << ScalarSelCost
1665 <<
", NumSelects=" << Selects.size() <<
")\n");
1670 auto InsertPt = std::next(BC->getIterator());
1674 InsertPt = std::next(CondInst->getIterator());
1682 for (SelectInst *Sel : Selects) {
1684 Value *Idx = Ext->getIndexOperand();
1688 replaceValue(*Sel, *NewExt);
1693 <<
" selects into vector select\n");
1707 unsigned ReductionOpc =
1713 CostBeforeReduction =
1714 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1716 CostAfterReduction =
1717 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1721 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1727 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1734 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1737 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1739 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1742 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1743 CostAfterReduction =
TTI.getMulAccReductionCost(
1744 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1747 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1751bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1754 if (BinOpOpc == Instruction::Sub)
1755 ReductionIID = Intrinsic::vector_reduce_add;
1759 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1764 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1765 return II->getArgOperand(0);
1769 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1772 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1781 unsigned ReductionOpc =
1794 CostOfRedOperand0 + CostOfRedOperand1 +
1797 if (NewCost >= OldCost || !NewCost.
isValid())
1801 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1804 if (BinOpOpc == Instruction::Or)
1805 VectorBO = Builder.
CreateOr(V0, V1,
"",
1811 replaceValue(
I, *Rdx);
1819 unsigned NumScanned = 0;
1820 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1829class ScalarizationResult {
1830 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1835 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1836 : Status(Status), ToFreeze(ToFreeze) {}
1839 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1840 ~ScalarizationResult() {
1841 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1844 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1845 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1846 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1847 return {StatusTy::SafeWithFreeze, ToFreeze};
1851 bool isSafe()
const {
return Status == StatusTy::Safe; }
1853 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1856 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1861 Status = StatusTy::Unsafe;
1865 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1866 assert(isSafeWithFreeze() &&
1867 "should only be used when freezing is required");
1869 "UserI must be a user of ToFreeze");
1870 IRBuilder<>::InsertPointGuard Guard(Builder);
1875 if (
U.get() == ToFreeze)
1890 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1894 if (
C->getValue().ult(NumElements))
1895 return ScalarizationResult::safe();
1896 return ScalarizationResult::unsafe();
1901 return ScalarizationResult::unsafe();
1903 APInt Zero(IntWidth, 0);
1904 APInt MaxElts(IntWidth, NumElements);
1911 return ScalarizationResult::safe();
1912 return ScalarizationResult::unsafe();
1925 if (ValidIndices.
contains(IdxRange))
1926 return ScalarizationResult::safeWithFreeze(IdxBase);
1927 return ScalarizationResult::unsafe();
1939 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1951bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1963 if (!
match(
SI->getValueOperand(),
1970 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1973 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1974 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1975 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1978 auto ScalarizableIdx =
1980 if (ScalarizableIdx.isUnsafe() ||
1987 Worklist.
push(Load);
1989 if (ScalarizableIdx.isSafeWithFreeze())
1992 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1993 {ConstantInt::get(Idx->getType(), 0), Idx});
1997 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
2000 replaceValue(
I, *NSI);
2010bool VectorCombine::scalarizeLoad(Instruction &
I) {
2017 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2020 bool AllExtracts =
true;
2021 bool AllBitcasts =
true;
2023 unsigned NumInstChecked = 0;
2028 for (User *U : LI->users()) {
2030 if (!UI || UI->getParent() != LI->getParent())
2035 if (UI->use_empty())
2039 AllExtracts =
false;
2041 AllBitcasts =
false;
2045 for (Instruction &
I :
2046 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2053 LastCheckedInst = UI;
2058 return scalarizeLoadExtract(LI, VecTy, Ptr);
2060 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2065bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2070 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2073 for (
auto &Pair : NeedFreeze)
2074 Pair.second.discard();
2082 for (User *U : LI->
users()) {
2087 if (ScalarIdx.isUnsafe())
2089 if (ScalarIdx.isSafeWithFreeze()) {
2090 NeedFreeze.try_emplace(UI, ScalarIdx);
2091 ScalarIdx.discard();
2097 Index ?
Index->getZExtValue() : -1);
2105 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2106 <<
"\n LoadExtractCost: " << OriginalCost
2107 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2109 if (ScalarizedCost >= OriginalCost)
2116 Type *ElemType = VecTy->getElementType();
2119 for (User *U : LI->
users()) {
2121 Value *Idx = EI->getIndexOperand();
2124 auto It = NeedFreeze.find(EI);
2125 if (It != NeedFreeze.end())
2132 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2134 Align ScalarOpAlignment =
2136 NewLoad->setAlignment(ScalarOpAlignment);
2139 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2144 replaceValue(*EI, *NewLoad,
false);
2147 FailureGuard.release();
2152bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2158 Type *TargetScalarType =
nullptr;
2159 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2161 for (User *U : LI->
users()) {
2164 Type *DestTy = BC->getDestTy();
2168 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2169 if (DestBitWidth != VecBitWidth)
2173 if (!TargetScalarType)
2174 TargetScalarType = DestTy;
2175 else if (TargetScalarType != DestTy)
2183 if (!TargetScalarType)
2191 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2192 <<
"\n OriginalCost: " << OriginalCost
2193 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2195 if (ScalarizedCost >= OriginalCost)
2206 ScalarLoad->copyMetadata(*LI);
2209 for (User *U : LI->
users()) {
2211 replaceValue(*BC, *ScalarLoad,
false);
2217bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2232 Type *ScalarDstTy = DstTy->getElementType();
2233 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2239 unsigned ExtCnt = 0;
2240 bool ExtLane0 =
false;
2241 for (User *U : Ext->users()) {
2255 Instruction::And, ScalarDstTy,
CostKind,
2258 (ExtCnt - ExtLane0) *
2260 Instruction::LShr, ScalarDstTy,
CostKind,
2263 if (ScalarCost > VectorCost)
2266 Value *ScalarV = Ext->getOperand(0);
2273 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2274 bool AllExtractsTriggerUB =
true;
2275 ExtractElementInst *LastExtract =
nullptr;
2277 for (User *U : Ext->users()) {
2280 AllExtractsTriggerUB =
false;
2284 if (!LastExtract || LastExtract->
comesBefore(Extract))
2285 LastExtract = Extract;
2287 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2288 !AllExtractsTriggerUB ||
2296 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2297 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2300 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2301 for (User *U : Ext->users()) {
2307 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2308 : (Idx * SrcEltSizeInBits);
2311 U->replaceAllUsesWith(
And);
2319bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2320 Type *Ty =
I.getType();
2325 if (
DL->isBigEndian())
2336 uint64_t ShAmtX = 0;
2344 uint64_t ShAmtY = 0;
2352 if (ShAmtX > ShAmtY) {
2360 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2361 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2366 MaskTy->getNumElements() != ShAmtDiff ||
2367 MaskTy->getNumElements() > (
BitWidth / 2))
2372 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2373 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2376 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2393 if (Ty != ConcatIntTy)
2399 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2400 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2403 if (NewCost > OldCost)
2413 if (Ty != ConcatIntTy) {
2423 replaceValue(
I, *Result);
2429bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2430 BinaryOperator *BinOp;
2431 ArrayRef<int> OuterMask;
2439 Value *Op00, *Op01, *Op10, *Op11;
2440 ArrayRef<int> Mask0, Mask1;
2445 if (!Match0 && !Match1)
2458 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2461 unsigned NumSrcElts = BinOpTy->getNumElements();
2466 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2470 SmallVector<int> NewMask0, NewMask1;
2471 for (
int M : OuterMask) {
2472 if (M < 0 || M >= (
int)NumSrcElts) {
2476 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2477 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2481 unsigned NumOpElts = Op0Ty->getNumElements();
2482 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2483 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2485 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2486 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2495 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2496 0,
nullptr, {BinOp}, &
I);
2498 NewCost += BinOpCost;
2504 OldCost += Shuf0Cost;
2506 NewCost += Shuf0Cost;
2512 OldCost += Shuf1Cost;
2514 NewCost += Shuf1Cost;
2522 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2526 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2528 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2529 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2533 if (NewCost > OldCost)
2544 NewInst->copyIRFlags(BinOp);
2548 replaceValue(
I, *NewBO);
2554bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2555 ArrayRef<int> OldMask;
2562 if (
LHS->getOpcode() !=
RHS->getOpcode())
2566 bool IsCommutative =
false;
2575 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2586 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2589 bool SameBinOp =
LHS ==
RHS;
2590 unsigned NumSrcElts = BinOpTy->getNumElements();
2593 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2596 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2597 if (M >= (
int)NumSrcElts)
2601 SmallVector<int> NewMask0(OldMask);
2610 SmallVector<int> NewMask1(OldMask);
2629 ShuffleDstTy, BinResTy, OldMask,
CostKind, 0,
2639 ArrayRef<int> InnerMask;
2641 m_Mask(InnerMask)))) &&
2644 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2656 bool ReducedInstCount =
false;
2657 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2658 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2659 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2660 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2661 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2666 auto *ShuffleCmpTy =
2669 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2670 if (!SingleSrcBinOp)
2680 PredLHS,
CostKind, Op0Info, Op1Info);
2690 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2697 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2706 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2710 NewInst->copyIRFlags(
LHS);
2711 NewInst->andIRFlags(
RHS);
2716 replaceValue(
I, *NewBO);
2723bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2725 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2736 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2742 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2743 ((SI0FOp !=
nullptr) &&
2744 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2750 auto SelOp = Instruction::Select;
2758 CostSel1 + CostSel2 +
2760 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2764 Mask,
CostKind, 0,
nullptr, {C1, C2});
2774 if (!Sel1->hasOneUse())
2775 NewCost += CostSel1;
2776 if (!Sel2->hasOneUse())
2777 NewCost += CostSel2;
2780 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2782 if (NewCost > OldCost)
2791 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2792 SI0FOp->getFastMathFlags());
2794 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2799 replaceValue(
I, *NewSel);
2805bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2807 ArrayRef<int> OldMask;
2816 if (!C0 || (IsBinaryShuffle && !C1))
2823 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2826 if (IsBinaryShuffle) {
2827 if (C0->getSrcTy() != C1->getSrcTy())
2830 if (Opcode != C1->getOpcode()) {
2832 Opcode = Instruction::SExt;
2841 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2844 unsigned NumSrcElts = CastSrcTy->getNumElements();
2845 unsigned NumDstElts = CastDstTy->getNumElements();
2846 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2847 "Only bitcasts expected to alter src/dst element counts");
2851 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2852 (NumDstElts % NumSrcElts) != 0)
2855 SmallVector<int, 16> NewMask;
2856 if (NumSrcElts >= NumDstElts) {
2859 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2860 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2865 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2866 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2871 auto *NewShuffleDstTy =
2880 if (IsBinaryShuffle)
2895 if (IsBinaryShuffle) {
2905 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2907 if (NewCost > OldCost)
2911 if (IsBinaryShuffle)
2921 NewInst->copyIRFlags(C0);
2922 if (IsBinaryShuffle)
2923 NewInst->andIRFlags(C1);
2927 replaceValue(
I, *Cast);
2937bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2938 ArrayRef<int> OuterMask;
2939 Value *OuterV0, *OuterV1;
2944 ArrayRef<int> InnerMask0, InnerMask1;
2945 Value *X0, *X1, *Y0, *Y1;
2950 if (!Match0 && !Match1)
2955 SmallVector<int, 16> PoisonMask1;
2960 InnerMask1 = PoisonMask1;
2964 X0 = Match0 ? X0 : OuterV0;
2965 Y0 = Match0 ? Y0 : OuterV0;
2966 X1 = Match1 ? X1 : OuterV1;
2967 Y1 = Match1 ? Y1 : OuterV1;
2971 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2975 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2976 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2981 SmallVector<int, 16> NewMask(OuterMask);
2982 Value *NewX =
nullptr, *NewY =
nullptr;
2983 for (
int &M : NewMask) {
2984 Value *Src =
nullptr;
2985 if (0 <= M && M < (
int)NumImmElts) {
2989 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2990 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2992 }
else if (M >= (
int)NumImmElts) {
2997 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2998 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
3002 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
3011 if (!NewX || NewX == Src) {
3015 if (!NewY || NewY == Src) {
3031 replaceValue(
I, *NewX);
3048 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3054 nullptr, {NewX, NewY});
3056 NewCost += InnerCost0;
3058 NewCost += InnerCost1;
3061 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3063 if (NewCost > OldCost)
3067 replaceValue(
I, *Shuf);
3083bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3088 unsigned ChainLength = 0;
3089 SmallVector<int>
Mask;
3090 SmallVector<int> YMask;
3100 ArrayRef<int> OuterMask;
3101 Value *OuterV0, *OuterV1;
3102 if (ChainLength != 0 && !Trunk->
hasOneUse())
3105 m_Mask(OuterMask))))
3107 if (OuterV0->
getType() != TrunkType) {
3113 ArrayRef<int> InnerMask0, InnerMask1;
3114 Value *A0, *A1, *B0, *B1;
3119 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3120 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3121 if (Match0Leaf == Match1Leaf) {
3127 SmallVector<int> CommutedOuterMask;
3134 for (
int &M : CommutedOuterMask) {
3137 if (M < (
int)NumTrunkElts)
3142 OuterMask = CommutedOuterMask;
3161 int NumLeafElts = YType->getNumElements();
3162 SmallVector<int> LocalYMask(InnerMask1);
3163 for (
int &M : LocalYMask) {
3164 if (M >= NumLeafElts)
3174 Mask.assign(OuterMask);
3175 YMask.
assign(LocalYMask);
3176 OldCost = NewCost = LocalOldCost;
3183 SmallVector<int> NewYMask(YMask);
3185 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3186 if (LeafM == -1 || CombinedM == LeafM)
3188 if (CombinedM == -1) {
3198 SmallVector<int> NewMask;
3199 NewMask.
reserve(NumTrunkElts);
3200 for (
int M : Mask) {
3201 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3216 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3220 if (ChainLength == 1) {
3221 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3224 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3225 <<
" old cost: " << (OldCost + LocalOldCost)
3226 <<
" new cost: " << LocalNewCost <<
'\n';
3231 OldCost += LocalOldCost;
3232 NewCost = LocalNewCost;
3236 if (ChainLength <= 1)
3240 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3243 for (
int &M : Mask) {
3244 if (M >=
static_cast<int>(NumTrunkElts))
3245 M = YMask[
M - NumTrunkElts];
3249 replaceValue(
I, *Root);
3256 replaceValue(
I, *Root);
3262bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3264 ArrayRef<int> OldMask;
3274 if (IID != II1->getIntrinsicID())
3283 if (!ShuffleDstTy || !II0Ty)
3289 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3291 II0->getArgOperand(
I) != II1->getArgOperand(
I))
3297 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3301 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3302 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3304 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3308 ShuffleDstTy->getNumElements());
3310 std::pair<Value *, Value *> OperandPair =
3311 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3312 if (!SeenOperandPairs.
insert(OperandPair).second) {
3318 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3321 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3324 if (!II0->hasOneUse())
3326 if (II1 != II0 && !II1->hasOneUse())
3330 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3333 if (NewCost > OldCost)
3337 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3338 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3342 std::pair<Value *, Value *> OperandPair =
3343 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3344 auto It = ShuffleCache.
find(OperandPair);
3345 if (It != ShuffleCache.
end()) {
3351 II1->getArgOperand(
I), OldMask);
3352 ShuffleCache[OperandPair] = Shuf;
3360 NewInst->copyIRFlags(II0);
3361 NewInst->andIRFlags(II1);
3364 replaceValue(
I, *NewIntrinsic);
3370bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3382 if (!ShuffleDstTy || !IntrinsicSrcTy)
3386 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3387 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3400 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3404 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3406 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3410 ShuffleDstTy->getNumElements());
3413 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3414 {II0->getArgOperand(
I)});
3417 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3422 if (!II0->hasOneUse())
3425 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3426 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3428 if (NewCost > OldCost)
3433 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3448 replaceValue(
I, *NewIntrinsic);
3458 int M = SV->getMaskValue(Lane);
3461 if (
static_cast<unsigned>(M) < NumElts) {
3462 V = SV->getOperand(0);
3465 V = SV->getOperand(1);
3476 auto [U, Lane] = IL;
3489 unsigned NumElts = Ty->getNumElements();
3490 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3496 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3502 unsigned NumSlices = Item.
size() / NumElts;
3507 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3508 Value *SliceV = Item[Slice * NumElts].first;
3509 if (!SliceV || SliceV->
getType() != Ty)
3511 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3512 auto [V, Lane] = Item[Slice * NumElts + Elt];
3513 if (Lane !=
static_cast<int>(Elt) || SliceV != V)
3522 const DenseSet<std::pair<Value *, Use *>> &IdentityLeafs,
3523 const DenseSet<std::pair<Value *, Use *>> &SplatLeafs,
3524 const DenseSet<std::pair<Value *, Use *>> &ConcatLeafs,
3526 auto [FrontV, FrontLane] = Item.
front();
3528 if (IdentityLeafs.contains(std::make_pair(FrontV, From))) {
3531 if (SplatLeafs.contains(std::make_pair(FrontV, From))) {
3533 return Builder.CreateShuffleVector(FrontV, Mask);
3535 if (ConcatLeafs.contains(std::make_pair(FrontV, From))) {
3539 for (
unsigned S = 0; S < Values.
size(); ++S)
3540 Values[S] = Item[S * NumElts].first;
3542 while (Values.
size() > 1) {
3545 std::iota(Mask.begin(), Mask.end(), 0);
3547 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3549 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3557 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3559 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3562 Ops[Idx] =
II->getOperand(Idx);
3566 &
I->getOperandUse(Idx), Ty, IdentityLeafs,
3567 SplatLeafs, ConcatLeafs, Builder,
TTI);
3571 for (
const auto &Lane : Item)
3584 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3594 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3599 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3613bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3615 if (!Ty ||
I.use_empty())
3619 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3623 Worklist.
push_back(std::make_pair(Start, &*
I.use_begin()));
3624 DenseSet<std::pair<Value *, Use *>> IdentityLeafs, SplatLeafs, ConcatLeafs;
3625 unsigned NumVisited = 0;
3627 while (!Worklist.
empty()) {
3632 auto Item = ItemFrom.first;
3633 auto From = ItemFrom.second;
3634 auto [FrontV, FrontLane] = Item.front();
3642 return X->getType() ==
Y->getType() &&
3647 if (FrontLane == 0 &&
3649 Ty->getNumElements() &&
3651 Value *FrontV = Item.front().first;
3652 return !
E.value().first || (IsEquiv(
E.value().first, FrontV) &&
3653 E.value().second == (int)
E.index());
3655 IdentityLeafs.
insert(std::make_pair(FrontV, From));
3660 C &&
C->getSplatValue() &&
3662 Value *FrontV = Item.front().first;
3668 SplatLeafs.
insert(std::make_pair(FrontV, From));
3673 auto [FrontV, FrontLane] = Item.front();
3674 auto [
V, Lane] = IL;
3675 return !
V || (
V == FrontV && Lane == FrontLane);
3677 SplatLeafs.
insert(std::make_pair(FrontV, From));
3683 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3684 Value *FrontV = Item.front().first;
3693 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3696 if (CI->getSrcTy()->getScalarType() !=
3701 SI->getOperand(0)->getType() !=
3708 II->getIntrinsicID() ==
3710 !
II->hasOperandBundles());
3717 BO && BO->isIntDivRem())
3724 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3725 FPToUIInst, SIToFPInst, UIToFPInst>(FrontV)) {
3733 if (DstTy && SrcTy &&
3734 SrcTy->getNumElements() == DstTy->getNumElements()) {
3736 &BitCast->getOperandUse(0));
3741 &Sel->getOperandUse(0));
3743 &Sel->getOperandUse(1));
3745 &Sel->getOperandUse(2));
3749 !
II->hasOperandBundles()) {
3750 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3754 Value *FrontV = Item.front().first;
3770 ConcatLeafs.
insert(std::make_pair(FrontV, From));
3777 if (NumVisited <= 1)
3780 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3786 SplatLeafs, ConcatLeafs, Builder, &
TTI);
3787 replaceValue(
I, *V);
3794bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3798 switch (
II->getIntrinsicID()) {
3799 case Intrinsic::vector_reduce_add:
3800 case Intrinsic::vector_reduce_mul:
3801 case Intrinsic::vector_reduce_and:
3802 case Intrinsic::vector_reduce_or:
3803 case Intrinsic::vector_reduce_xor:
3804 case Intrinsic::vector_reduce_smin:
3805 case Intrinsic::vector_reduce_smax:
3806 case Intrinsic::vector_reduce_umin:
3807 case Intrinsic::vector_reduce_umax:
3816 std::queue<Value *> Worklist;
3817 SmallPtrSet<Value *, 4> Visited;
3818 ShuffleVectorInst *Shuffle =
nullptr;
3822 while (!Worklist.empty()) {
3823 Value *CV = Worklist.front();
3835 if (CI->isBinaryOp()) {
3836 for (
auto *
Op : CI->operand_values())
3840 if (Shuffle && Shuffle != SV)
3857 for (
auto *V : Visited)
3858 for (
auto *U :
V->users())
3859 if (!Visited.contains(U) && U != &
I)
3862 FixedVectorType *VecType =
3866 FixedVectorType *ShuffleInputType =
3868 if (!ShuffleInputType)
3874 SmallVector<int> ConcatMask;
3876 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3877 bool UsesSecondVec =
3878 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3885 ShuffleInputType, ConcatMask,
CostKind);
3887 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3889 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3891 bool MadeChanges =
false;
3892 if (NewCost < OldCost) {
3896 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3897 replaceValue(*Shuffle, *NewShuffle);
3903 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3949bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3951 std::queue<Value *> InstWorklist;
3955 std::optional<unsigned int> CommonCallOp = std::nullopt;
3956 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3958 bool IsFirstCallOrBinInst =
true;
3959 bool ShouldBeCallOrBinInst =
true;
3965 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3975 int64_t
VecSize = FVT->getNumElements();
3981 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3982 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3992 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3993 Cur = (Cur + 1) / 2, --
Mask) {
3995 ExpectedParityMask |= (1ll <<
Mask);
3998 InstWorklist.push(VecOpEE);
4000 bool IsPartialReduction =
false;
4002 while (!InstWorklist.empty()) {
4003 Value *CI = InstWorklist.front();
4007 if (!ShouldBeCallOrBinInst)
4010 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4015 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4017 IsFirstCallOrBinInst =
false;
4020 CommonCallOp =
II->getIntrinsicID();
4021 if (
II->getIntrinsicID() != *CommonCallOp)
4024 switch (
II->getIntrinsicID()) {
4025 case Intrinsic::umin:
4026 case Intrinsic::umax:
4027 case Intrinsic::smin:
4028 case Intrinsic::smax: {
4029 auto *Op0 =
II->getOperand(0);
4030 auto *Op1 =
II->getOperand(1);
4038 ShouldBeCallOrBinInst ^= 1;
4040 IntrinsicCostAttributes ICA(
4041 *CommonCallOp,
II->getType(),
4042 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
4049 InstWorklist.push(PrevVecV[1]);
4050 InstWorklist.push(PrevVecV[0]);
4054 if (!ShouldBeCallOrBinInst)
4057 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4060 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4062 IsFirstCallOrBinInst =
false;
4070 switch (*CommonBinOp) {
4071 case BinaryOperator::Add:
4072 case BinaryOperator::Mul:
4073 case BinaryOperator::Or:
4074 case BinaryOperator::And:
4075 case BinaryOperator::Xor: {
4085 ShouldBeCallOrBinInst ^= 1;
4092 InstWorklist.push(PrevVecV[1]);
4093 InstWorklist.push(PrevVecV[0]);
4097 if (ShouldBeCallOrBinInst ||
any_of(PrevVecV,
equal_to(
nullptr)))
4100 if (SVInst != PrevVecV[1])
4103 ArrayRef<int> CurMask;
4109 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
4110 if (Mask < ShuffleMaskHalf &&
4111 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
4113 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
4118 ShuffleMaskHalf *= 2;
4119 ShuffleMaskHalf -= (ExpectedParityMask & 1);
4120 ExpectedParityMask >>= 1;
4123 SVInst->getType(), SVInst->getType(),
4127 if (!ExpectedParityMask && VisitedCnt == NumLevels)
4130 ShouldBeCallOrBinInst ^= 1;
4137 if (ShouldBeCallOrBinInst && VisitedCnt >= 1 && CI == PrevVecV[0] &&
4139 IsPartialReduction =
true;
4148 if (ShouldBeCallOrBinInst && !IsPartialReduction)
4151 assert(VecSize != -1 &&
"Expected Match for Vector Size");
4153 Value *FinalVecV = PrevVecV[0];
4166 FixedVectorType *ReduceVecTy = FinalVecVTy;
4167 SmallVector<int> ExtractMask;
4169 if (IsPartialReduction) {
4170 unsigned SubVecSize = ShuffleMaskHalf;
4172 ExtractMask.
resize(SubVecSize);
4173 std::iota(ExtractMask.
begin(), ExtractMask.
end(), 0);
4175 ReduceVecTy, FinalVecVTy, ExtractMask,
4179 IntrinsicCostAttributes ICA(ReducedOp, ReduceVecTy, {ReduceVecTy});
4182 LLVM_DEBUG(
dbgs() <<
"Found reduction shuffle chain: " <<
I <<
"\n OldCost : "
4183 << OrigCost <<
" vs NewCost: " << NewCost <<
"\n");
4185 if (VecOpEE->
hasOneUse() ? (NewCost > OrigCost) : (NewCost >= OrigCost))
4188 Value *ReduceInput = FinalVecV;
4189 if (IsPartialReduction)
4193 ReducedOp, {ReduceInput->
getType()}, {ReduceInput});
4194 replaceValue(
I, *ReducedResult);
4203bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4208 bool TruncOnly =
false;
4211 case Intrinsic::vector_reduce_add:
4212 case Intrinsic::vector_reduce_mul:
4215 case Intrinsic::vector_reduce_and:
4216 case Intrinsic::vector_reduce_or:
4217 case Intrinsic::vector_reduce_xor:
4224 Value *ReductionSrc =
I.getOperand(0);
4236 Type *ResultTy =
I.getType();
4239 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4249 if (OldCost <= NewCost || !NewCost.
isValid())
4253 II->getIntrinsicID(), {Src});
4255 replaceValue(
I, *NewCast);
4283bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4285 IntrinsicInst *ReduceOp;
4286 const APInt *CmpVal;
4293 case Intrinsic::vector_reduce_or:
4294 case Intrinsic::vector_reduce_umax:
4295 case Intrinsic::vector_reduce_and:
4296 case Intrinsic::vector_reduce_umin:
4297 case Intrinsic::vector_reduce_add:
4308 unsigned BitWidth = VecTy->getScalarSizeInBits();
4312 unsigned NumElts = VecTy->getNumElements();
4321 case Intrinsic::vector_reduce_or:
4322 case Intrinsic::vector_reduce_umax:
4323 TreeOpcode = Instruction::Or;
4325 case Intrinsic::vector_reduce_and:
4326 case Intrinsic::vector_reduce_umin:
4327 TreeOpcode = Instruction::And;
4329 case Intrinsic::vector_reduce_add:
4330 TreeOpcode = Instruction::Add;
4338 SmallVector<Value *, 8> Worklist;
4339 SmallVector<Value *, 8> Sources;
4341 std::optional<bool> IsAShr;
4342 constexpr unsigned MaxSources = 8;
4347 while (!Worklist.
empty() && Worklist.
size() <= MaxSources &&
4348 Sources.
size() <= MaxSources) {
4357 bool ThisIsAShr = Shr->getOpcode() == Instruction::AShr;
4359 IsAShr = ThisIsAShr;
4360 else if (*IsAShr != ThisIsAShr)
4386 if (Sources.
empty() || Sources.
size() > MaxSources ||
4387 Worklist.
size() > MaxSources || !IsAShr)
4390 unsigned NumSources = Sources.
size();
4394 if (OrigIID == Intrinsic::vector_reduce_add &&
4402 (OrigIID == Intrinsic::vector_reduce_add) ? NumSources * NumElts : 1;
4405 NegativeVal.negate();
4437 TestsNegative =
false;
4438 }
else if (*CmpVal == NegativeVal) {
4439 TestsNegative =
true;
4443 IsEq = Pred == ICmpInst::ICMP_EQ;
4444 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeHigh) {
4446 TestsNegative = (RangeHigh == NegativeVal);
4447 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeHigh - 1) {
4449 TestsNegative = (RangeHigh == NegativeVal);
4450 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeLow) {
4452 TestsNegative = (RangeLow == NegativeVal);
4453 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeLow + 1) {
4455 TestsNegative = (RangeLow == NegativeVal);
4498 enum CheckKind :
unsigned {
4505 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4507 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4509 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4513 case Intrinsic::vector_reduce_or:
4514 case Intrinsic::vector_reduce_umax:
4515 Base = TestsNegative ? AnyNeg : AllNonNeg;
4517 case Intrinsic::vector_reduce_and:
4518 case Intrinsic::vector_reduce_umin:
4519 Base = TestsNegative ? AllNeg : AnyNonNeg;
4521 case Intrinsic::vector_reduce_add:
4522 Base = TestsNegative ? AllNeg : AllNonNeg;
4537 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4538 : std::make_pair(MinMax, MinMaxCost);
4542 auto [NewIID, NewCost] = RequiresOr(
Check)
4543 ? PickCheaper(Intrinsic::vector_reduce_or,
4544 Intrinsic::vector_reduce_umax)
4545 : PickCheaper(
Intrinsic::vector_reduce_and,
4549 if (NumSources > 1) {
4550 unsigned CombineOpc =
4551 RequiresOr(
Check) ? Instruction::Or : Instruction::And;
4556 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4557 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4559 if (NewCost > OldCost)
4564 Type *ScalarTy = VecTy->getScalarType();
4567 if (NumSources == 1) {
4578 replaceValue(
I, *NewCmp);
4603bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4614 switch (
II->getIntrinsicID()) {
4615 case Intrinsic::vector_reduce_add:
4616 case Intrinsic::vector_reduce_or:
4617 case Intrinsic::vector_reduce_umin:
4618 case Intrinsic::vector_reduce_umax:
4619 case Intrinsic::vector_reduce_smin:
4620 case Intrinsic::vector_reduce_smax:
4626 Value *InnerOp =
II->getArgOperand(0);
4669 switch (
II->getIntrinsicID()) {
4670 case Intrinsic::vector_reduce_add: {
4675 unsigned NumElems = XTy->getNumElements();
4681 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4689 case Intrinsic::vector_reduce_smin:
4690 case Intrinsic::vector_reduce_smax:
4700 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4716 case Intrinsic::vector_reduce_add:
4717 case Intrinsic::vector_reduce_or:
4723 case Intrinsic::vector_reduce_umin:
4724 case Intrinsic::vector_reduce_umax:
4725 case Intrinsic::vector_reduce_smin:
4726 case Intrinsic::vector_reduce_smax:
4738 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4740 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4741 << *InnerOp <<
"\n OldCost: " << OldCost
4742 <<
" vs NewCost: " << NewCost <<
"\n");
4748 if (NewCost > OldCost)
4757 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4758 replaceValue(
I, *NewCmp);
4789bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4792 const APInt *CmpVal;
4797 if (!
II || !
II->hasOneUse())
4800 const auto IsValidOrUmaxCmp = [&]() {
4809 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4811 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4812 Pred == ICmpInst::ICMP_SLT;
4813 return IsEquality || IsPositive || IsNegative;
4816 const auto IsValidAndUminCmp = [&]() {
4821 const auto LeadingOnes = CmpVal->
countl_one();
4828 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4837 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4838 return IsEquality || IsNegative || IsPositive;
4846 switch (OriginalIID) {
4847 case Intrinsic::vector_reduce_or:
4848 if (!IsValidOrUmaxCmp())
4850 AlternativeIID = Intrinsic::vector_reduce_umax;
4852 case Intrinsic::vector_reduce_umax:
4853 if (!IsValidOrUmaxCmp())
4855 AlternativeIID = Intrinsic::vector_reduce_or;
4857 case Intrinsic::vector_reduce_and:
4858 if (!IsValidAndUminCmp())
4860 AlternativeIID = Intrinsic::vector_reduce_umin;
4862 case Intrinsic::vector_reduce_umin:
4863 if (!IsValidAndUminCmp())
4865 AlternativeIID = Intrinsic::vector_reduce_and;
4878 if (ReductionOpc != Instruction::ICmp)
4889 <<
"\n OrigCost: " << OrigCost
4890 <<
" vs AltCost: " << AltCost <<
"\n");
4892 if (AltCost >= OrigCost)
4896 Type *ScalarTy = VecTy->getScalarType();
4899 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
4901 replaceValue(
I, *NewCmp);
4915 unsigned Depth = 0) {
4916 constexpr unsigned MaxLocalDepth = 2;
4917 if (
Depth > MaxLocalDepth)
4920 auto NumSignBits = [&](
const Value *
X) {
4923 if (NumSignBits(V) == V->getType()->getScalarSizeInBits())
4928 return NumSignBits(
A) >= 2 && NumSignBits(
B) >= 2 &&
4939bool VectorCombine::foldReduceAddCmpZero(Instruction &
I) {
4949 if (!VecTy || VecTy->getNumElements() < 2)
4955 if (!IsNonNegative && !IsNonPositive)
4960 unsigned NumElts = VecTy->getNumElements();
4962 if (
Log2_32(NumElts) >= NumSignBits)
4965 ICmpInst::Predicate NewPred;
4967 case ICmpInst::ICMP_EQ:
4968 case ICmpInst::ICMP_ULE:
4969 case ICmpInst::ICMP_SLE:
4970 case ICmpInst::ICMP_SGE:
4971 NewPred = ICmpInst::ICMP_EQ;
4973 case ICmpInst::ICMP_NE:
4974 case ICmpInst::ICMP_UGT:
4975 case ICmpInst::ICMP_SGT:
4976 case ICmpInst::ICMP_SLT:
4977 NewPred = ICmpInst::ICMP_NE;
4987 if (!IsNonNegative &&
4988 (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE))
4990 if (!IsNonPositive &&
4991 (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE))
4993 if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE ||
4994 Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) &&
4995 Log2_32(NumElts) >= NumSignBits - 1)
4999 Instruction::Add, VecTy, std::nullopt,
CostKind);
5001 Instruction::Or, VecTy, std::nullopt,
CostKind);
5003 Intrinsic::umax, VecTy, FastMathFlags(),
CostKind);
5006 bool UseOr = OrCost.
isValid() && (!UmaxCost.
isValid() || OrCost <= UmaxCost);
5008 if (AltCost > OrigCost)
5014 Intrinsic::vector_reduce_umax, {VecTy}, {Vec});
5015 Worklist.pushValue(NewReduce);
5017 NewPred, NewReduce, ConstantInt::getNullValue(VecTy->getScalarType()));
5018 replaceValue(
I, *NewCmp);
5027 constexpr unsigned MaxVisited = 32;
5030 bool FoundReduction =
false;
5033 while (!WorkList.
empty()) {
5035 for (
User *U :
I->users()) {
5037 if (!UI || !Visited.
insert(UI).second)
5039 if (Visited.
size() > MaxVisited)
5045 switch (
II->getIntrinsicID()) {
5046 case Intrinsic::vector_reduce_add:
5047 case Intrinsic::vector_reduce_mul:
5048 case Intrinsic::vector_reduce_and:
5049 case Intrinsic::vector_reduce_or:
5050 case Intrinsic::vector_reduce_xor:
5051 case Intrinsic::vector_reduce_smin:
5052 case Intrinsic::vector_reduce_smax:
5053 case Intrinsic::vector_reduce_umin:
5054 case Intrinsic::vector_reduce_umax:
5055 FoundReduction =
true;
5068 return FoundReduction;
5081bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
5086 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
5094 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
5096 if (!
I ||
I->getOperand(0)->getType() != VT)
5098 return any_of(
I->users(), [&](User *U) {
5099 return U != Op0 && U != Op1 &&
5100 !(isa<ShuffleVectorInst>(U) &&
5101 (InputShuffles.contains(cast<Instruction>(U)) ||
5102 isInstructionTriviallyDead(cast<Instruction>(U))));
5105 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
5106 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
5114 for (
auto *U :
I->users()) {
5116 if (!SV || SV->getType() != VT)
5118 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
5119 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
5126 if (!collectShuffles(Op0) || !collectShuffles(Op1))
5130 if (FromReduction && Shuffles.
size() > 1)
5135 if (!FromReduction) {
5136 for (
size_t Idx = 0,
E = Shuffles.
size(); Idx !=
E; ++Idx) {
5137 for (
auto *U : Shuffles[Idx]->
users()) {
5152 int MaxV1Elt = 0, MaxV2Elt = 0;
5153 unsigned NumElts = VT->getNumElements();
5154 for (ShuffleVectorInst *SVN : Shuffles) {
5155 SmallVector<int>
Mask;
5156 SVN->getShuffleMask(Mask);
5160 Value *SVOp0 = SVN->getOperand(0);
5161 Value *SVOp1 = SVN->getOperand(1);
5166 for (
int &Elem : Mask) {
5172 if (SVOp0 == Op1 && SVOp1 == Op0) {
5176 if (SVOp0 != Op0 || SVOp1 != Op1)
5182 SmallVector<int> ReconstructMask;
5183 for (
unsigned I = 0;
I <
Mask.size();
I++) {
5186 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
5187 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
5188 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
5189 return Mask[
I] ==
A.first;
5198 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
5199 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
5200 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
5214 sort(ReconstructMask);
5215 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
5223 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
5224 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
5236 if (InputShuffles.contains(SSV))
5238 return SV->getMaskValue(M);
5246 std::pair<int, int>
Y) {
5247 int MXA = GetBaseMaskValue(
A,
X.first);
5248 int MYA = GetBaseMaskValue(
A,
Y.first);
5251 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5252 return SortBase(SVI0A,
A,
B);
5254 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5255 return SortBase(SVI1A,
A,
B);
5260 for (
const auto &Mask : OrigReconstructMasks) {
5261 SmallVector<int> ReconstructMask;
5262 for (
int M : Mask) {
5264 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
5265 assert(It !=
V.end() &&
"Expected all entries in Mask");
5266 return std::distance(
V.begin(), It);
5270 else if (M <
static_cast<int>(NumElts)) {
5271 ReconstructMask.
push_back(FindIndex(V1, M));
5273 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
5276 ReconstructMasks.
push_back(std::move(ReconstructMask));
5281 SmallVector<int> V1A, V1B, V2A, V2B;
5282 for (
unsigned I = 0;
I < V1.
size();
I++) {
5283 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
5284 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
5286 for (
unsigned I = 0;
I < V2.
size();
I++) {
5287 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
5288 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
5290 while (V1A.
size() < NumElts) {
5294 while (V2A.
size() < NumElts) {
5306 VT, VT, SV->getShuffleMask(),
CostKind);
5313 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
5314 unsigned MaxVectorSize =
5316 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
5317 if (MaxElementsInVector == 0)
5326 std::set<SmallVector<int, 4>> UniqueShuffles;
5331 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5332 if (NumFullVectors < 2)
5333 return C + ShuffleCost;
5334 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5335 unsigned NumUniqueGroups = 0;
5336 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5339 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5340 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5341 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5342 if (UniqueShuffles.insert(SubShuffle).second)
5343 NumUniqueGroups += 1;
5345 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5351 SmallVector<int, 16>
Mask;
5352 SV->getShuffleMask(Mask);
5353 return AddShuffleMaskAdjustedCost(
C, Mask);
5356 auto AllShufflesHaveSameOperands =
5357 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5358 if (InputShuffles.size() < 2)
5360 ShuffleVectorInst *FirstSV =
5367 std::next(InputShuffles.begin()), InputShuffles.end(),
5368 [&](Instruction *
I) {
5369 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5370 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5379 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5381 if (AllShufflesHaveSameOperands(InputShuffles)) {
5382 UniqueShuffles.clear();
5383 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5386 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5392 FixedVectorType *Op0SmallVT =
5394 FixedVectorType *Op1SmallVT =
5399 UniqueShuffles.clear();
5400 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5402 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5404 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5407 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5409 <<
" vs CostAfter: " << CostAfter <<
"\n");
5410 if (CostBefore < CostAfter ||
5421 if (InputShuffles.contains(SSV))
5423 return SV->getOperand(
Op);
5427 GetShuffleOperand(SVI0A, 1), V1A);
5430 GetShuffleOperand(SVI0B, 1), V1B);
5433 GetShuffleOperand(SVI1A, 1), V2A);
5436 GetShuffleOperand(SVI1B, 1), V2B);
5441 I->copyIRFlags(Op0,
true);
5446 I->copyIRFlags(Op1,
true);
5448 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5451 replaceValue(*Shuffles[S], *NSV,
false);
5454 Worklist.pushValue(NSV0A);
5455 Worklist.pushValue(NSV0B);
5456 Worklist.pushValue(NSV1A);
5457 Worklist.pushValue(NSV1B);
5467bool VectorCombine::shrinkType(Instruction &
I) {
5468 Value *ZExted, *OtherOperand;
5474 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5478 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5480 if (
I.getOpcode() == Instruction::LShr) {
5497 Instruction::ZExt, BigTy, SmallTy,
5498 TargetTransformInfo::CastContextHint::None,
CostKind);
5503 for (User *U : ZExtOperand->
users()) {
5510 ShrinkCost += ZExtCost;
5525 ShrinkCost += ZExtCost;
5532 Instruction::Trunc, SmallTy, BigTy,
5533 TargetTransformInfo::CastContextHint::None,
CostKind);
5538 if (ShrinkCost > CurrentCost)
5542 Value *Op0 = ZExted;
5545 if (
I.getOperand(0) == OtherOperand)
5552 replaceValue(
I, *NewZExtr);
5558bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5559 Value *DstVec, *SrcVec;
5560 uint64_t ExtIdx, InsIdx;
5570 if (!DstVecTy || !SrcVecTy ||
5576 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5583 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5585 if (NeedDstSrcSwap) {
5587 Mask[InsIdx] = ExtIdx % NumDstElts;
5591 std::iota(
Mask.begin(),
Mask.end(), 0);
5592 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5605 SmallVector<int> ExtToVecMask;
5606 if (!NeedExpOrNarrow) {
5611 nullptr, {DstVec, SrcVec});
5617 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5620 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5624 if (!Ext->hasOneUse())
5627 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5628 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5631 if (OldCost < NewCost)
5634 if (NeedExpOrNarrow) {
5635 if (!NeedDstSrcSwap)
5648 replaceValue(
I, *Shuf);
5657bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5658 const APInt *SplatVal0, *SplatVal1;
5668 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5669 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5678 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5679 << *
I.getType() <<
" is too high.\n");
5683 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5684 NewSplatVal <<= Width;
5685 NewSplatVal |= SplatVal0->
zext(Width * 2);
5687 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5695bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
5697 if (!OldLoad || !OldLoad->isSimple())
5704 unsigned const OldNumElements = OldLoadTy->getNumElements();
5710 using IndexRange = std::pair<int, int>;
5711 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
5712 IndexRange OutputRange = IndexRange(OldNumElements, -1);
5713 for (llvm::Use &Use :
I.uses()) {
5715 User *Shuffle =
Use.getUser();
5720 return std::nullopt;
5727 for (
int Index : Mask) {
5728 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
5729 OutputRange.first = std::min(Index, OutputRange.first);
5730 OutputRange.second = std::max(Index, OutputRange.second);
5735 if (OutputRange.second < OutputRange.first)
5736 return std::nullopt;
5742 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
5743 unsigned const NewNumElements = Indices->second + 1u;
5747 if (NewNumElements < OldNumElements) {
5752 Type *ElemTy = OldLoadTy->getElementType();
5754 Value *PtrOp = OldLoad->getPointerOperand();
5757 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
5758 OldLoad->getPointerAddressSpace(),
CostKind);
5761 OldLoad->getPointerAddressSpace(),
CostKind);
5763 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
5765 unsigned const MaxIndex = NewNumElements * 2u;
5767 for (llvm::Use &Use :
I.uses()) {
5774 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
5780 for (
int Index : OldMask) {
5781 if (Index >=
static_cast<int>(MaxIndex))
5795 dbgs() <<
"Found a load used only by shufflevector instructions: "
5796 <<
I <<
"\n OldCost: " << OldCost
5797 <<
" vs NewCost: " << NewCost <<
"\n");
5799 if (OldCost < NewCost || !NewCost.
isValid())
5805 NewLoad->copyMetadata(
I);
5808 for (UseEntry &Use : NewUses) {
5809 ShuffleVectorInst *Shuffle =
Use.first;
5810 std::vector<int> &NewMask =
Use.second;
5817 replaceValue(*Shuffle, *NewShuffle,
false);
5830bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
5832 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
5836 ArrayRef<int> Mask0;
5837 ArrayRef<int> Mask1;
5850 auto const InputNumElements = InputVT->getNumElements();
5852 if (InputNumElements >= ResultVT->getNumElements())
5857 SmallVector<int, 16> NewMask;
5860 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
5861 if (
M0 >= 0 &&
M1 >= 0)
5863 else if (
M0 == -1 &&
M1 == -1)
5876 int MaskOffset = NewMask[0
u];
5877 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
5880 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
5894 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5897 if (NewCost > OldCost)
5909 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
5911 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
5917 replaceValue(*Phi, *NewShuf1);
5923bool VectorCombine::run() {
5937 auto Opcode =
I.getOpcode();
5945 if (IsFixedVectorType) {
5947 case Instruction::InsertElement:
5948 if (vectorizeLoadInsert(
I))
5951 case Instruction::ShuffleVector:
5952 if (widenSubvectorLoad(
I))
5963 if (scalarizeOpOrCmp(
I))
5965 if (scalarizeLoad(
I))
5967 if (scalarizeExtExtract(
I))
5969 if (scalarizeVPIntrinsic(
I))
5971 if (foldInterleaveIntrinsics(
I))
5975 if (Opcode == Instruction::Store)
5976 if (foldSingleElementStore(
I))
5980 if (TryEarlyFoldsOnly)
5987 if (IsFixedVectorType) {
5989 case Instruction::InsertElement:
5990 if (foldInsExtFNeg(
I))
5992 if (foldInsExtBinop(
I))
5994 if (foldInsExtVectorToShuffle(
I))
5997 case Instruction::ShuffleVector:
5998 if (foldPermuteOfBinops(
I))
6000 if (foldShuffleOfBinops(
I))
6002 if (foldShuffleOfSelects(
I))
6004 if (foldShuffleOfCastops(
I))
6006 if (foldShuffleOfShuffles(
I))
6008 if (foldPermuteOfIntrinsic(
I))
6010 if (foldShufflesOfLengthChangingShuffles(
I))
6012 if (foldShuffleOfIntrinsics(
I))
6014 if (foldSelectShuffle(
I))
6016 if (foldShuffleToIdentity(
I))
6019 case Instruction::Load:
6020 if (shrinkLoadForShuffles(
I))
6023 case Instruction::BitCast:
6024 if (foldBitcastShuffle(
I))
6026 if (foldSelectsFromBitcast(
I))
6029 case Instruction::And:
6030 case Instruction::Or:
6031 case Instruction::Xor:
6032 if (foldBitOpOfCastops(
I))
6034 if (foldBitOpOfCastConstant(
I))
6037 case Instruction::PHI:
6038 if (shrinkPhiOfShuffles(
I))
6048 case Instruction::Call:
6049 if (foldShuffleFromReductions(
I))
6051 if (foldCastFromReductions(
I))
6054 case Instruction::ExtractElement:
6055 if (foldShuffleChainsToReduce(
I))
6058 case Instruction::ICmp:
6059 if (foldSignBitReductionCmp(
I))
6061 if (foldICmpEqZeroVectorReduce(
I))
6063 if (foldEquivalentReductionCmp(
I))
6065 if (foldReduceAddCmpZero(
I))
6068 case Instruction::FCmp:
6069 if (foldExtractExtract(
I))
6072 case Instruction::Or:
6073 if (foldConcatOfBoolMasks(
I))
6078 if (foldExtractExtract(
I))
6080 if (foldExtractedCmps(
I))
6082 if (foldBinopOfReductions(
I))
6091 bool MadeChange =
false;
6092 for (BasicBlock &BB :
F) {
6104 if (!
I->isDebugOrPseudoInst())
6105 MadeChange |= FoldInst(*
I);
6112 while (!Worklist.isEmpty()) {
6122 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, Use *From, FixedVectorType *Ty, const DenseSet< std::pair< Value *, Use * > > &IdentityLeafs, const DenseSet< std::pair< Value *, Use * > > &SplatLeafs, const DenseSet< std::pair< Value *, Use * > > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
std::pair< Value *, int > InstLane
static bool isKnownNonPositive(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Used by foldReduceAddCmpZero to check if we can prove that a value is non-positive.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, const SimplifyQuery &SQ)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static InstLane lookThroughShuffles(Value *V, int Lane)
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
auto m_AnyIntrinsic()
Matches any intrinsic call and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const