44#define DEBUG_TYPE "vector-combine"
50STATISTIC(NumVecLoad,
"Number of vector loads formed");
51STATISTIC(NumVecCmp,
"Number of vector compares formed");
52STATISTIC(NumVecBO,
"Number of vector binops formed");
53STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
54STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
55STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
56STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
57STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
61 cl::desc(
"Disable all vector combine transforms"));
65 cl::desc(
"Disable binop extract to shuffle transforms"));
69 cl::desc(
"Max number of instructions to scan for vector combining."));
71static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
79 bool TryEarlyFoldsOnly)
82 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 const TargetTransformInfo &TTI;
90 const DominatorTree &DT;
95 const SimplifyQuery SQ;
99 bool TryEarlyFoldsOnly;
101 InstructionWorklist Worklist;
110 bool vectorizeLoadInsert(Instruction &
I);
111 bool widenSubvectorLoad(Instruction &
I);
112 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
113 ExtractElementInst *Ext1,
114 unsigned PreferredExtractIndex)
const;
115 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
116 const Instruction &
I,
117 ExtractElementInst *&ConvertToShuffle,
118 unsigned PreferredExtractIndex);
121 bool foldExtractExtract(Instruction &
I);
122 bool foldInsExtFNeg(Instruction &
I);
123 bool foldInsExtBinop(Instruction &
I);
124 bool foldInsExtVectorToShuffle(Instruction &
I);
125 bool foldBitOpOfCastops(Instruction &
I);
126 bool foldBitOpOfCastConstant(Instruction &
I);
127 bool foldBitcastShuffle(Instruction &
I);
128 bool scalarizeOpOrCmp(Instruction &
I);
129 bool scalarizeVPIntrinsic(Instruction &
I);
130 bool foldExtractedCmps(Instruction &
I);
131 bool foldSelectsFromBitcast(Instruction &
I);
132 bool foldBinopOfReductions(Instruction &
I);
133 bool foldSingleElementStore(Instruction &
I);
134 bool scalarizeLoad(Instruction &
I);
135 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
136 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeExtExtract(Instruction &
I);
138 bool foldConcatOfBoolMasks(Instruction &
I);
139 bool foldPermuteOfBinops(Instruction &
I);
140 bool foldShuffleOfBinops(Instruction &
I);
141 bool foldShuffleOfSelects(Instruction &
I);
142 bool foldShuffleOfCastops(Instruction &
I);
143 bool foldShuffleOfShuffles(Instruction &
I);
144 bool foldPermuteOfIntrinsic(Instruction &
I);
145 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
146 bool foldShuffleOfIntrinsics(Instruction &
I);
147 bool foldShuffleToIdentity(Instruction &
I);
148 bool foldShuffleFromReductions(Instruction &
I);
149 bool foldShuffleChainsToReduce(Instruction &
I);
150 bool foldCastFromReductions(Instruction &
I);
151 bool foldSignBitReductionCmp(Instruction &
I);
152 bool foldICmpEqZeroVectorReduce(Instruction &
I);
153 bool foldEquivalentReductionCmp(Instruction &
I);
154 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
155 bool foldInterleaveIntrinsics(Instruction &
I);
156 bool shrinkType(Instruction &
I);
157 bool shrinkLoadForShuffles(Instruction &
I);
158 bool shrinkPhiOfShuffles(Instruction &
I);
160 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
166 Worklist.pushUsersToWorkList(*NewI);
167 Worklist.pushValue(NewI);
184 SmallPtrSet<Value *, 4> Visited;
189 OpI,
nullptr,
nullptr, [&](
Value *V) {
194 NextInst = NextInst->getNextNode();
199 Worklist.pushUsersToWorkList(*OpI);
200 Worklist.pushValue(OpI);
220 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
221 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
227 Type *ScalarTy = Load->getType()->getScalarType();
229 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
230 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
237bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
263 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
266 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
267 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
268 unsigned OffsetEltIndex = 0;
276 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
277 APInt
Offset(OffsetBitWidth, 0);
287 uint64_t ScalarSizeInBytes = ScalarSize / 8;
288 if (
Offset.urem(ScalarSizeInBytes) != 0)
292 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
293 if (OffsetEltIndex >= MinVecNumElts)
310 unsigned AS =
Load->getPointerAddressSpace();
329 unsigned OutputNumElts = Ty->getNumElements();
331 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
332 Mask[0] = OffsetEltIndex;
339 if (OldCost < NewCost || !NewCost.
isValid())
350 replaceValue(
I, *VecLd);
358bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
361 if (!Shuf->isIdentityWithPadding())
367 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
368 return M >= (int)(NumOpElts);
379 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
387 unsigned AS =
Load->getPointerAddressSpace();
402 if (OldCost < NewCost || !NewCost.
isValid())
409 replaceValue(
I, *VecLd);
416ExtractElementInst *VectorCombine::getShuffleExtract(
417 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
421 assert(Index0C && Index1C &&
"Expected constant extract indexes");
423 unsigned Index0 = Index0C->getZExtValue();
424 unsigned Index1 = Index1C->getZExtValue();
427 if (Index0 == Index1)
451 if (PreferredExtractIndex == Index0)
453 if (PreferredExtractIndex == Index1)
457 return Index0 > Index1 ? Ext0 : Ext1;
465bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
466 ExtractElementInst *Ext1,
467 const Instruction &
I,
468 ExtractElementInst *&ConvertToShuffle,
469 unsigned PreferredExtractIndex) {
472 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
474 unsigned Opcode =
I.getOpcode();
487 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
488 "Expected a compare");
498 unsigned Ext0Index = Ext0IndexC->getZExtValue();
499 unsigned Ext1Index = Ext1IndexC->getZExtValue();
513 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
514 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
515 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
520 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
525 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
527 OldCost = CheapExtractCost + ScalarOpCost;
528 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
532 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
533 NewCost = VectorOpCost + CheapExtractCost +
538 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
539 if (ConvertToShuffle) {
551 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
553 ShuffleMask[BestInsIndex] = BestExtIndex;
555 VecTy, VecTy, ShuffleMask,
CostKind, 0,
556 nullptr, {ConvertToShuffle});
559 VecTy, VecTy, {},
CostKind, 0,
nullptr,
567 return OldCost < NewCost;
579 ShufMask[NewIndex] = OldIndex;
580 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
632 V1,
"foldExtExtBinop");
637 VecBOInst->copyIRFlags(&
I);
643bool VectorCombine::foldExtractExtract(Instruction &
I) {
674 ExtractElementInst *ExtractToChange;
675 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
681 if (ExtractToChange) {
682 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
687 if (ExtractToChange == Ext0)
696 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
697 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
700 replaceValue(
I, *NewExt);
706bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
709 uint64_t ExtIdx, InsIdx;
724 auto *DstVecScalarTy = DstVecTy->getScalarType();
726 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
731 unsigned NumDstElts = DstVecTy->getNumElements();
732 unsigned NumSrcElts = SrcVecTy->getNumElements();
733 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
739 SmallVector<int>
Mask(NumDstElts);
740 std::iota(
Mask.begin(),
Mask.end(), 0);
741 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
757 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
760 SmallVector<int> SrcMask;
763 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
765 DstVecTy, SrcVecTy, SrcMask,
CostKind);
769 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
771 if (NewCost > OldCost)
774 Value *NewShuf, *LenChgShuf =
nullptr;
788 replaceValue(
I, *NewShuf);
794bool VectorCombine::foldInsExtBinop(Instruction &
I) {
795 BinaryOperator *VecBinOp, *SclBinOp;
827 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
829 if (NewCost > OldCost)
840 NewInst->copyIRFlags(VecBinOp);
841 NewInst->andIRFlags(SclBinOp);
846 replaceValue(
I, *NewBO);
852bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
855 if (!BinOp || !BinOp->isBitwiseLogicOp())
861 if (!LHSCast || !RHSCast) {
862 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
868 if (CastOpcode != RHSCast->getOpcode())
872 switch (CastOpcode) {
873 case Instruction::BitCast:
874 case Instruction::Trunc:
875 case Instruction::SExt:
876 case Instruction::ZExt:
882 Value *LHSSrc = LHSCast->getOperand(0);
883 Value *RHSSrc = RHSCast->getOperand(0);
889 auto *SrcTy = LHSSrc->
getType();
890 auto *DstTy =
I.getType();
893 if (CastOpcode != Instruction::BitCast &&
898 if (!SrcTy->getScalarType()->isIntegerTy() ||
899 !DstTy->getScalarType()->isIntegerTy())
914 LHSCastCost + RHSCastCost;
925 if (!LHSCast->hasOneUse())
926 NewCost += LHSCastCost;
927 if (!RHSCast->hasOneUse())
928 NewCost += RHSCastCost;
931 <<
" NewCost=" << NewCost <<
"\n");
933 if (NewCost > OldCost)
938 BinOp->getName() +
".inner");
940 NewBinOp->copyIRFlags(BinOp);
954 replaceValue(
I, *Result);
963bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
979 switch (CastOpcode) {
980 case Instruction::BitCast:
981 case Instruction::ZExt:
982 case Instruction::SExt:
983 case Instruction::Trunc:
989 Value *LHSSrc = LHSCast->getOperand(0);
991 auto *SrcTy = LHSSrc->
getType();
992 auto *DstTy =
I.getType();
995 if (CastOpcode != Instruction::BitCast &&
1000 if (!SrcTy->getScalarType()->isIntegerTy() ||
1001 !DstTy->getScalarType()->isIntegerTy())
1005 PreservedCastFlags RHSFlags;
1030 if (!LHSCast->hasOneUse())
1031 NewCost += LHSCastCost;
1033 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1034 <<
" NewCost=" << NewCost <<
"\n");
1036 if (NewCost > OldCost)
1041 LHSSrc, InvC,
I.getName() +
".inner");
1043 NewBinOp->copyIRFlags(&
I);
1063 replaceValue(
I, *Result);
1070bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1084 if (!DestTy || !SrcTy)
1087 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1088 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1089 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1099 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1100 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1104 SmallVector<int, 16> NewMask;
1105 if (DestEltSize <= SrcEltSize) {
1108 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1109 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1114 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1115 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1122 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1123 auto *NewShuffleTy =
1125 auto *OldShuffleTy =
1127 unsigned NumOps = IsUnary ? 1 : 2;
1137 TargetTransformInfo::CastContextHint::None,
1142 TargetTransformInfo::CastContextHint::None,
1145 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1146 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1148 if (NewCost > OldCost || !NewCost.
isValid())
1156 replaceValue(
I, *Shuf);
1163bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1177 if (!ScalarOp0 || !ScalarOp1)
1185 auto IsAllTrueMask = [](
Value *MaskVal) {
1188 return ConstValue->isAllOnesValue();
1202 SmallVector<int>
Mask;
1204 Mask.resize(FVTy->getNumElements(), 0);
1213 Args.push_back(
V->getType());
1214 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1219 std::optional<unsigned> FunctionalOpcode =
1221 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1222 if (!FunctionalOpcode) {
1231 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1241 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1243 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1246 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1249 if (OldCost < NewCost || !NewCost.
isValid())
1260 bool SafeToSpeculate;
1266 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1267 if (!SafeToSpeculate &&
1274 {ScalarOp0, ScalarOp1})
1276 ScalarOp0, ScalarOp1);
1285bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1290 if (!UO && !BO && !CI && !
II)
1298 if (Arg->getType() !=
II->getType() &&
1308 for (User *U :
I.users())
1315 std::optional<uint64_t>
Index;
1317 auto Ops =
II ?
II->args() :
I.operands();
1321 uint64_t InsIdx = 0;
1326 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1332 else if (InsIdx != *Index)
1349 if (!
Index.has_value())
1353 Type *ScalarTy = VecTy->getScalarType();
1354 assert(VecTy->isVectorTy() &&
1357 "Unexpected types for insert element into binop or cmp");
1359 unsigned Opcode =
I.getOpcode();
1367 }
else if (UO || BO) {
1371 IntrinsicCostAttributes ScalarICA(
1372 II->getIntrinsicID(), ScalarTy,
1375 IntrinsicCostAttributes VectorICA(
1376 II->getIntrinsicID(), VecTy,
1383 Value *NewVecC =
nullptr;
1385 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1388 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1390 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1404 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1406 II->getIntrinsicID(), Idx, &
TTI)))
1409 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1410 OldCost += InsertCost;
1411 NewCost += !
Op->hasOneUse() * InsertCost;
1415 if (OldCost < NewCost || !NewCost.
isValid())
1425 ++NumScalarIntrinsic;
1435 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1441 Scalar->setName(
I.getName() +
".scalar");
1446 ScalarInst->copyIRFlags(&
I);
1449 replaceValue(
I, *Insert);
1456bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1461 if (!BI || !
I.getType()->isIntegerTy(1))
1466 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1469 CmpPredicate
P0,
P1;
1481 uint64_t Index0, Index1;
1488 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1491 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1492 "Unknown ExtractElementInst");
1497 unsigned CmpOpcode =
1512 Ext0Cost + Ext1Cost + CmpCost * 2 +
1518 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1519 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1524 ShufMask[CheapIndex] = ExpensiveIndex;
1529 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1530 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1535 if (OldCost < NewCost || !NewCost.
isValid())
1545 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1546 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1549 replaceValue(
I, *NewExt);
1576bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1583 if (!SrcVecTy || !DstVecTy)
1593 if (SrcEltBits != 32 && SrcEltBits != 64)
1596 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1613 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1616 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1619 if (!BC->hasNUsesOrMore(MinSelects))
1624 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1626 for (User *U : BC->users()) {
1631 for (User *ExtUser : Ext->users()) {
1635 Cond->getType()->isIntegerTy(1))
1640 if (CondToSelects.
empty())
1643 bool MadeChange =
false;
1644 Value *SrcVec = BC->getOperand(0);
1647 for (
auto [
Cond, Selects] : CondToSelects) {
1649 if (Selects.size() < MinSelects) {
1650 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1651 <<
"profitable (VecCost=" << VecSelCost
1652 <<
", ScalarCost=" << ScalarSelCost
1653 <<
", NumSelects=" << Selects.size() <<
")\n");
1658 auto InsertPt = std::next(BC->getIterator());
1662 InsertPt = std::next(CondInst->getIterator());
1670 for (SelectInst *Sel : Selects) {
1672 Value *Idx = Ext->getIndexOperand();
1676 replaceValue(*Sel, *NewExt);
1681 <<
" selects into vector select\n");
1695 unsigned ReductionOpc =
1701 CostBeforeReduction =
1702 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1704 CostAfterReduction =
1705 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1709 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1715 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1722 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1725 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1727 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1730 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1731 CostAfterReduction =
TTI.getMulAccReductionCost(
1732 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1735 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1739bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1742 if (BinOpOpc == Instruction::Sub)
1743 ReductionIID = Intrinsic::vector_reduce_add;
1747 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1752 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1753 return II->getArgOperand(0);
1757 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1760 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1769 unsigned ReductionOpc =
1782 CostOfRedOperand0 + CostOfRedOperand1 +
1785 if (NewCost >= OldCost || !NewCost.
isValid())
1789 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1792 if (BinOpOpc == Instruction::Or)
1793 VectorBO = Builder.
CreateOr(V0, V1,
"",
1799 replaceValue(
I, *Rdx);
1807 unsigned NumScanned = 0;
1808 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1817class ScalarizationResult {
1818 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1823 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1824 : Status(Status), ToFreeze(ToFreeze) {}
1827 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1828 ~ScalarizationResult() {
1829 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1832 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1833 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1834 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1835 return {StatusTy::SafeWithFreeze, ToFreeze};
1839 bool isSafe()
const {
return Status == StatusTy::Safe; }
1841 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1844 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1849 Status = StatusTy::Unsafe;
1853 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1854 assert(isSafeWithFreeze() &&
1855 "should only be used when freezing is required");
1857 "UserI must be a user of ToFreeze");
1858 IRBuilder<>::InsertPointGuard Guard(Builder);
1863 if (
U.get() == ToFreeze)
1880 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1884 if (
C->getValue().ult(NumElements))
1885 return ScalarizationResult::safe();
1886 return ScalarizationResult::unsafe();
1891 return ScalarizationResult::unsafe();
1893 APInt Zero(IntWidth, 0);
1894 APInt MaxElts(IntWidth, NumElements);
1900 true, &AC, CtxI, &DT)))
1901 return ScalarizationResult::safe();
1902 return ScalarizationResult::unsafe();
1915 if (ValidIndices.
contains(IdxRange))
1916 return ScalarizationResult::safeWithFreeze(IdxBase);
1917 return ScalarizationResult::unsafe();
1929 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1941bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1953 if (!
match(
SI->getValueOperand(),
1960 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1963 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1964 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1965 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1969 if (ScalarizableIdx.isUnsafe() ||
1976 Worklist.
push(Load);
1978 if (ScalarizableIdx.isSafeWithFreeze())
1981 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1982 {ConstantInt::get(Idx->getType(), 0), Idx});
1986 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1989 replaceValue(
I, *NSI);
1999bool VectorCombine::scalarizeLoad(Instruction &
I) {
2006 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2009 bool AllExtracts =
true;
2010 bool AllBitcasts =
true;
2012 unsigned NumInstChecked = 0;
2017 for (User *U : LI->users()) {
2019 if (!UI || UI->getParent() != LI->getParent())
2024 if (UI->use_empty())
2028 AllExtracts =
false;
2030 AllBitcasts =
false;
2034 for (Instruction &
I :
2035 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2042 LastCheckedInst = UI;
2047 return scalarizeLoadExtract(LI, VecTy, Ptr);
2049 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2054bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2059 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2062 for (
auto &Pair : NeedFreeze)
2063 Pair.second.discard();
2071 for (User *U : LI->
users()) {
2076 if (ScalarIdx.isUnsafe())
2078 if (ScalarIdx.isSafeWithFreeze()) {
2079 NeedFreeze.try_emplace(UI, ScalarIdx);
2080 ScalarIdx.discard();
2086 Index ?
Index->getZExtValue() : -1);
2094 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2095 <<
"\n LoadExtractCost: " << OriginalCost
2096 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2098 if (ScalarizedCost >= OriginalCost)
2105 Type *ElemType = VecTy->getElementType();
2108 for (User *U : LI->
users()) {
2110 Value *Idx = EI->getIndexOperand();
2113 auto It = NeedFreeze.find(EI);
2114 if (It != NeedFreeze.end())
2121 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2123 Align ScalarOpAlignment =
2125 NewLoad->setAlignment(ScalarOpAlignment);
2128 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2133 replaceValue(*EI, *NewLoad,
false);
2136 FailureGuard.release();
2141bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2147 Type *TargetScalarType =
nullptr;
2148 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2150 for (User *U : LI->
users()) {
2153 Type *DestTy = BC->getDestTy();
2157 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2158 if (DestBitWidth != VecBitWidth)
2162 if (!TargetScalarType)
2163 TargetScalarType = DestTy;
2164 else if (TargetScalarType != DestTy)
2172 if (!TargetScalarType)
2180 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2181 <<
"\n OriginalCost: " << OriginalCost
2182 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2184 if (ScalarizedCost >= OriginalCost)
2195 ScalarLoad->copyMetadata(*LI);
2198 for (User *U : LI->
users()) {
2200 replaceValue(*BC, *ScalarLoad,
false);
2206bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2221 Type *ScalarDstTy = DstTy->getElementType();
2222 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2228 unsigned ExtCnt = 0;
2229 bool ExtLane0 =
false;
2230 for (User *U : Ext->users()) {
2244 Instruction::And, ScalarDstTy,
CostKind,
2247 (ExtCnt - ExtLane0) *
2249 Instruction::LShr, ScalarDstTy,
CostKind,
2252 if (ScalarCost > VectorCost)
2255 Value *ScalarV = Ext->getOperand(0);
2262 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2263 bool AllExtractsTriggerUB =
true;
2264 ExtractElementInst *LastExtract =
nullptr;
2266 for (User *U : Ext->users()) {
2269 AllExtractsTriggerUB =
false;
2273 if (!LastExtract || LastExtract->
comesBefore(Extract))
2274 LastExtract = Extract;
2276 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2277 !AllExtractsTriggerUB ||
2285 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2286 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2289 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2290 for (User *U : Ext->users()) {
2296 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2297 : (Idx * SrcEltSizeInBits);
2300 U->replaceAllUsesWith(
And);
2308bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2309 Type *Ty =
I.getType();
2314 if (
DL->isBigEndian())
2325 uint64_t ShAmtX = 0;
2333 uint64_t ShAmtY = 0;
2341 if (ShAmtX > ShAmtY) {
2349 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2350 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2355 MaskTy->getNumElements() != ShAmtDiff ||
2356 MaskTy->getNumElements() > (
BitWidth / 2))
2361 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2362 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2365 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2382 if (Ty != ConcatIntTy)
2388 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2389 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2392 if (NewCost > OldCost)
2402 if (Ty != ConcatIntTy) {
2412 replaceValue(
I, *Result);
2418bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2419 BinaryOperator *BinOp;
2420 ArrayRef<int> OuterMask;
2428 Value *Op00, *Op01, *Op10, *Op11;
2429 ArrayRef<int> Mask0, Mask1;
2434 if (!Match0 && !Match1)
2447 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2450 unsigned NumSrcElts = BinOpTy->getNumElements();
2455 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2459 SmallVector<int> NewMask0, NewMask1;
2460 for (
int M : OuterMask) {
2461 if (M < 0 || M >= (
int)NumSrcElts) {
2465 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2466 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2470 unsigned NumOpElts = Op0Ty->getNumElements();
2471 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2472 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2474 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2475 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2484 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2485 0,
nullptr, {BinOp}, &
I);
2487 NewCost += BinOpCost;
2493 OldCost += Shuf0Cost;
2495 NewCost += Shuf0Cost;
2501 OldCost += Shuf1Cost;
2503 NewCost += Shuf1Cost;
2511 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2515 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2517 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2518 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2522 if (NewCost > OldCost)
2533 NewInst->copyIRFlags(BinOp);
2537 replaceValue(
I, *NewBO);
2543bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2544 ArrayRef<int> OldMask;
2551 if (
LHS->getOpcode() !=
RHS->getOpcode())
2555 bool IsCommutative =
false;
2564 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2575 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2578 unsigned NumSrcElts = BinOpTy->getNumElements();
2581 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2584 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2585 if (M >= (
int)NumSrcElts)
2589 SmallVector<int> NewMask0(OldMask);
2598 SmallVector<int> NewMask1(OldMask);
2622 ArrayRef<int> InnerMask;
2624 m_Mask(InnerMask)))) &&
2627 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2639 bool ReducedInstCount =
false;
2640 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2641 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2642 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2643 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2644 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2645 ReducedInstCount |= SingleSrcBinOp;
2647 auto *ShuffleCmpTy =
2650 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2651 if (!SingleSrcBinOp)
2661 PredLHS,
CostKind, Op0Info, Op1Info);
2665 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2672 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2681 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2685 NewInst->copyIRFlags(
LHS);
2686 NewInst->andIRFlags(
RHS);
2691 replaceValue(
I, *NewBO);
2698bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2700 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2711 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2717 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2718 ((SI0FOp !=
nullptr) &&
2719 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2725 auto SelOp = Instruction::Select;
2733 CostSel1 + CostSel2 +
2735 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2739 Mask,
CostKind, 0,
nullptr, {C1, C2});
2749 if (!Sel1->hasOneUse())
2750 NewCost += CostSel1;
2751 if (!Sel2->hasOneUse())
2752 NewCost += CostSel2;
2755 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2757 if (NewCost > OldCost)
2766 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2767 SI0FOp->getFastMathFlags());
2769 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2774 replaceValue(
I, *NewSel);
2780bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2782 ArrayRef<int> OldMask;
2791 if (!C0 || (IsBinaryShuffle && !C1))
2798 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2801 if (IsBinaryShuffle) {
2802 if (C0->getSrcTy() != C1->getSrcTy())
2805 if (Opcode != C1->getOpcode()) {
2807 Opcode = Instruction::SExt;
2816 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2819 unsigned NumSrcElts = CastSrcTy->getNumElements();
2820 unsigned NumDstElts = CastDstTy->getNumElements();
2821 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2822 "Only bitcasts expected to alter src/dst element counts");
2826 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2827 (NumDstElts % NumSrcElts) != 0)
2830 SmallVector<int, 16> NewMask;
2831 if (NumSrcElts >= NumDstElts) {
2834 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2835 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2840 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2841 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2846 auto *NewShuffleDstTy =
2855 if (IsBinaryShuffle)
2870 if (IsBinaryShuffle) {
2880 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2882 if (NewCost > OldCost)
2886 if (IsBinaryShuffle)
2896 NewInst->copyIRFlags(C0);
2897 if (IsBinaryShuffle)
2898 NewInst->andIRFlags(C1);
2902 replaceValue(
I, *Cast);
2912bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2913 ArrayRef<int> OuterMask;
2914 Value *OuterV0, *OuterV1;
2919 ArrayRef<int> InnerMask0, InnerMask1;
2920 Value *X0, *X1, *Y0, *Y1;
2925 if (!Match0 && !Match1)
2930 SmallVector<int, 16> PoisonMask1;
2935 InnerMask1 = PoisonMask1;
2939 X0 = Match0 ? X0 : OuterV0;
2940 Y0 = Match0 ? Y0 : OuterV0;
2941 X1 = Match1 ? X1 : OuterV1;
2942 Y1 = Match1 ? Y1 : OuterV1;
2946 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2950 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2951 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2956 SmallVector<int, 16> NewMask(OuterMask);
2957 Value *NewX =
nullptr, *NewY =
nullptr;
2958 for (
int &M : NewMask) {
2959 Value *Src =
nullptr;
2960 if (0 <= M && M < (
int)NumImmElts) {
2964 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2965 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2967 }
else if (M >= (
int)NumImmElts) {
2972 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2973 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2977 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2986 if (!NewX || NewX == Src) {
2990 if (!NewY || NewY == Src) {
3006 replaceValue(
I, *NewX);
3023 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3029 nullptr, {NewX, NewY});
3031 NewCost += InnerCost0;
3033 NewCost += InnerCost1;
3036 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3038 if (NewCost > OldCost)
3042 replaceValue(
I, *Shuf);
3058bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3063 unsigned ChainLength = 0;
3064 SmallVector<int>
Mask;
3065 SmallVector<int> YMask;
3075 ArrayRef<int> OuterMask;
3076 Value *OuterV0, *OuterV1;
3077 if (ChainLength != 0 && !Trunk->
hasOneUse())
3080 m_Mask(OuterMask))))
3082 if (OuterV0->
getType() != TrunkType) {
3088 ArrayRef<int> InnerMask0, InnerMask1;
3089 Value *A0, *A1, *B0, *B1;
3094 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3095 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3096 if (Match0Leaf == Match1Leaf) {
3102 SmallVector<int> CommutedOuterMask;
3109 for (
int &M : CommutedOuterMask) {
3112 if (M < (
int)NumTrunkElts)
3117 OuterMask = CommutedOuterMask;
3136 int NumLeafElts = YType->getNumElements();
3137 SmallVector<int> LocalYMask(InnerMask1);
3138 for (
int &M : LocalYMask) {
3139 if (M >= NumLeafElts)
3149 Mask.assign(OuterMask);
3150 YMask.
assign(LocalYMask);
3151 OldCost = NewCost = LocalOldCost;
3158 SmallVector<int> NewYMask(YMask);
3160 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3161 if (LeafM == -1 || CombinedM == LeafM)
3163 if (CombinedM == -1) {
3173 SmallVector<int> NewMask;
3174 NewMask.
reserve(NumTrunkElts);
3175 for (
int M : Mask) {
3176 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3191 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3195 if (ChainLength == 1) {
3196 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3199 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3200 <<
" old cost: " << (OldCost + LocalOldCost)
3201 <<
" new cost: " << LocalNewCost <<
'\n';
3206 OldCost += LocalOldCost;
3207 NewCost = LocalNewCost;
3211 if (ChainLength <= 1)
3215 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3218 for (
int &M : Mask) {
3219 if (M >=
static_cast<int>(NumTrunkElts))
3220 M = YMask[
M - NumTrunkElts];
3224 replaceValue(
I, *Root);
3231 replaceValue(
I, *Root);
3237bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3239 ArrayRef<int> OldMask;
3249 if (IID != II1->getIntrinsicID())
3258 if (!ShuffleDstTy || !II0Ty)
3264 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3266 II0->getArgOperand(
I) != II1->getArgOperand(
I))
3272 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3276 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3277 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3279 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3283 ShuffleDstTy->getNumElements());
3285 std::pair<Value *, Value *> OperandPair =
3286 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3287 if (!SeenOperandPairs.
insert(OperandPair).second) {
3293 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3296 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3299 if (!II0->hasOneUse())
3301 if (II1 != II0 && !II1->hasOneUse())
3305 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3308 if (NewCost > OldCost)
3312 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3313 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3317 std::pair<Value *, Value *> OperandPair =
3318 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3319 auto It = ShuffleCache.
find(OperandPair);
3320 if (It != ShuffleCache.
end()) {
3326 II1->getArgOperand(
I), OldMask);
3327 ShuffleCache[OperandPair] = Shuf;
3335 NewInst->copyIRFlags(II0);
3336 NewInst->andIRFlags(II1);
3339 replaceValue(
I, *NewIntrinsic);
3345bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3357 if (!ShuffleDstTy || !IntrinsicSrcTy)
3361 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3362 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3375 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3379 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3381 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3385 ShuffleDstTy->getNumElements());
3388 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3389 {II0->getArgOperand(
I)});
3392 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3397 if (!II0->hasOneUse())
3400 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3401 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3403 if (NewCost > OldCost)
3408 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3423 replaceValue(
I, *NewIntrinsic);
3433 int M = SV->getMaskValue(Lane);
3436 if (
static_cast<unsigned>(M) < NumElts) {
3437 U = &SV->getOperandUse(0);
3440 U = &SV->getOperandUse(1);
3451 auto [U, Lane] = IL;
3465 unsigned NumElts = Ty->getNumElements();
3466 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3472 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3478 unsigned NumSlices = Item.
size() / NumElts;
3483 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3484 Use *SliceV = Item[Slice * NumElts].first;
3485 if (!SliceV || SliceV->get()->
getType() != Ty)
3487 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3488 auto [V, Lane] = Item[Slice * NumElts + Elt];
3489 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
3502 auto [FrontU, FrontLane] = Item.
front();
3504 if (IdentityLeafs.
contains(FrontU)) {
3505 return FrontU->get();
3509 return Builder.CreateShuffleVector(FrontU->get(), Mask);
3511 if (ConcatLeafs.
contains(FrontU)) {
3515 for (
unsigned S = 0; S < Values.
size(); ++S)
3516 Values[S] = Item[S * NumElts].first->get();
3518 while (Values.
size() > 1) {
3521 std::iota(Mask.begin(), Mask.end(), 0);
3523 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3525 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3533 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3535 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3538 Ops[Idx] =
II->getOperand(Idx);
3542 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
3547 for (
const auto &Lane : Item)
3560 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3570 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3575 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3589bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3591 if (!Ty ||
I.use_empty())
3595 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3600 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3601 unsigned NumVisited = 0;
3603 while (!Worklist.
empty()) {
3608 auto [FrontU, FrontLane] = Item.
front();
3616 return X->getType() ==
Y->getType() &&
3621 if (FrontLane == 0 &&
3623 Ty->getNumElements() &&
3626 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3627 E.value().second == (int)
E.index());
3629 IdentityLeafs.
insert(FrontU);
3634 C &&
C->getSplatValue() &&
3642 SplatLeafs.
insert(FrontU);
3647 auto [FrontU, FrontLane] = Item.
front();
3648 auto [
U, Lane] = IL;
3649 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3651 SplatLeafs.
insert(FrontU);
3657 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3661 Value *
V = IL.first->get();
3667 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3670 if (CI->getSrcTy()->getScalarType() !=
3675 SI->getOperand(0)->getType() !=
3682 II->getIntrinsicID() ==
3684 !
II->hasOperandBundles());
3691 BO && BO->isIntDivRem())
3696 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3697 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3704 if (DstTy && SrcTy &&
3705 SrcTy->getNumElements() == DstTy->getNumElements()) {
3716 !
II->hasOperandBundles()) {
3717 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3736 ConcatLeafs.
insert(FrontU);
3743 if (NumVisited <= 1)
3746 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3752 ConcatLeafs, Builder, &
TTI);
3753 replaceValue(
I, *V);
3760bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3764 switch (
II->getIntrinsicID()) {
3765 case Intrinsic::vector_reduce_add:
3766 case Intrinsic::vector_reduce_mul:
3767 case Intrinsic::vector_reduce_and:
3768 case Intrinsic::vector_reduce_or:
3769 case Intrinsic::vector_reduce_xor:
3770 case Intrinsic::vector_reduce_smin:
3771 case Intrinsic::vector_reduce_smax:
3772 case Intrinsic::vector_reduce_umin:
3773 case Intrinsic::vector_reduce_umax:
3782 std::queue<Value *> Worklist;
3783 SmallPtrSet<Value *, 4> Visited;
3784 ShuffleVectorInst *Shuffle =
nullptr;
3788 while (!Worklist.empty()) {
3789 Value *CV = Worklist.front();
3801 if (CI->isBinaryOp()) {
3802 for (
auto *
Op : CI->operand_values())
3806 if (Shuffle && Shuffle != SV)
3823 for (
auto *V : Visited)
3824 for (
auto *U :
V->users())
3825 if (!Visited.contains(U) && U != &
I)
3828 FixedVectorType *VecType =
3832 FixedVectorType *ShuffleInputType =
3834 if (!ShuffleInputType)
3840 SmallVector<int> ConcatMask;
3842 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3843 bool UsesSecondVec =
3844 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3851 ShuffleInputType, ConcatMask,
CostKind);
3853 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3855 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3857 bool MadeChanges =
false;
3858 if (NewCost < OldCost) {
3862 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3863 replaceValue(*Shuffle, *NewShuffle);
3869 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3915bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3917 std::queue<Value *> InstWorklist;
3921 std::optional<unsigned int> CommonCallOp = std::nullopt;
3922 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3924 bool IsFirstCallOrBinInst =
true;
3925 bool ShouldBeCallOrBinInst =
true;
3931 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3941 int64_t
VecSize = FVT->getNumElements();
3947 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3948 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3958 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3959 Cur = (Cur + 1) / 2, --
Mask) {
3961 ExpectedParityMask |= (1ll <<
Mask);
3964 InstWorklist.push(VecOpEE);
3966 while (!InstWorklist.empty()) {
3967 Value *CI = InstWorklist.front();
3971 if (!ShouldBeCallOrBinInst)
3974 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
3979 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3981 IsFirstCallOrBinInst =
false;
3984 CommonCallOp =
II->getIntrinsicID();
3985 if (
II->getIntrinsicID() != *CommonCallOp)
3988 switch (
II->getIntrinsicID()) {
3989 case Intrinsic::umin:
3990 case Intrinsic::umax:
3991 case Intrinsic::smin:
3992 case Intrinsic::smax: {
3993 auto *Op0 =
II->getOperand(0);
3994 auto *Op1 =
II->getOperand(1);
4002 ShouldBeCallOrBinInst ^= 1;
4004 IntrinsicCostAttributes ICA(
4005 *CommonCallOp,
II->getType(),
4006 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
4013 InstWorklist.push(PrevVecV[1]);
4014 InstWorklist.push(PrevVecV[0]);
4018 if (!ShouldBeCallOrBinInst)
4021 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4024 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4026 IsFirstCallOrBinInst =
false;
4034 switch (*CommonBinOp) {
4035 case BinaryOperator::Add:
4036 case BinaryOperator::Mul:
4037 case BinaryOperator::Or:
4038 case BinaryOperator::And:
4039 case BinaryOperator::Xor: {
4049 ShouldBeCallOrBinInst ^= 1;
4056 InstWorklist.push(PrevVecV[1]);
4057 InstWorklist.push(PrevVecV[0]);
4061 if (ShouldBeCallOrBinInst ||
any_of(PrevVecV,
equal_to(
nullptr)))
4064 if (SVInst != PrevVecV[1])
4067 ArrayRef<int> CurMask;
4073 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
4074 if (Mask < ShuffleMaskHalf &&
4075 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
4077 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
4082 ShuffleMaskHalf *= 2;
4083 ShuffleMaskHalf -= (ExpectedParityMask & 1);
4084 ExpectedParityMask >>= 1;
4087 SVInst->getType(), SVInst->getType(),
4091 if (!ExpectedParityMask && VisitedCnt == NumLevels)
4094 ShouldBeCallOrBinInst ^= 1;
4101 if (ShouldBeCallOrBinInst)
4104 assert(VecSize != -1 &&
"Expected Match for Vector Size");
4106 Value *FinalVecV = PrevVecV[0];
4118 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
4121 if (NewCost >= OrigCost)
4124 auto *ReducedResult =
4126 replaceValue(
I, *ReducedResult);
4135bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4140 bool TruncOnly =
false;
4143 case Intrinsic::vector_reduce_add:
4144 case Intrinsic::vector_reduce_mul:
4147 case Intrinsic::vector_reduce_and:
4148 case Intrinsic::vector_reduce_or:
4149 case Intrinsic::vector_reduce_xor:
4156 Value *ReductionSrc =
I.getOperand(0);
4168 Type *ResultTy =
I.getType();
4171 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4181 if (OldCost <= NewCost || !NewCost.
isValid())
4185 II->getIntrinsicID(), {Src});
4187 replaceValue(
I, *NewCast);
4205bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4208 const APInt *CmpVal;
4213 if (!
II || !
II->hasOneUse())
4218 case Intrinsic::vector_reduce_or:
4219 case Intrinsic::vector_reduce_umax:
4220 case Intrinsic::vector_reduce_and:
4221 case Intrinsic::vector_reduce_umin:
4222 case Intrinsic::vector_reduce_add:
4228 Value *ReductionSrc =
II->getArgOperand(0);
4236 unsigned BitWidth = VecTy->getScalarSizeInBits();
4237 unsigned NumElts = VecTy->getNumElements();
4242 if (OrigIID == Intrinsic::vector_reduce_add && !
isUIntN(
BitWidth, NumElts))
4252 OrigIID == Intrinsic::vector_reduce_add ? NumElts : 1);
4261 if (!CmpVal->
isZero() && *CmpVal != MaxVal)
4263 IsEq = Pred == ICmpInst::ICMP_EQ;
4264 TestsHigh = *CmpVal == MaxVal;
4315 enum CheckKind :
unsigned {
4322 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4324 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4326 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4330 case Intrinsic::vector_reduce_or:
4331 case Intrinsic::vector_reduce_umax:
4332 Base = TestsHigh ? AnyNeg : AllNonNeg;
4334 case Intrinsic::vector_reduce_and:
4335 case Intrinsic::vector_reduce_umin:
4336 Base = TestsHigh ? AllNeg : AnyNonNeg;
4338 case Intrinsic::vector_reduce_add:
4339 Base = TestsHigh ? AllNeg : AllNonNeg;
4359 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4360 : std::make_pair(MinMax, MinMaxCost);
4364 auto [NewIID, NewCost] = RequiresOr(
Check)
4365 ? PickCheaper(Intrinsic::vector_reduce_or,
4366 Intrinsic::vector_reduce_umax)
4367 : PickCheaper(
Intrinsic::vector_reduce_and,
4370 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4371 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4373 if (NewCost > OldCost)
4379 Type *ScalarTy = VecTy->getScalarType();
4383 replaceValue(
I, *NewCmp);
4408bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4419 switch (
II->getIntrinsicID()) {
4420 case Intrinsic::vector_reduce_add:
4421 case Intrinsic::vector_reduce_or:
4422 case Intrinsic::vector_reduce_umin:
4423 case Intrinsic::vector_reduce_umax:
4424 case Intrinsic::vector_reduce_smin:
4425 case Intrinsic::vector_reduce_smax:
4431 Value *InnerOp =
II->getArgOperand(0);
4474 switch (
II->getIntrinsicID()) {
4475 case Intrinsic::vector_reduce_add: {
4480 unsigned NumElems = XTy->getNumElements();
4486 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4494 case Intrinsic::vector_reduce_smin:
4495 case Intrinsic::vector_reduce_smax:
4505 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4521 case Intrinsic::vector_reduce_add:
4522 case Intrinsic::vector_reduce_or:
4528 case Intrinsic::vector_reduce_umin:
4529 case Intrinsic::vector_reduce_umax:
4530 case Intrinsic::vector_reduce_smin:
4531 case Intrinsic::vector_reduce_smax:
4543 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4545 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4546 << *InnerOp <<
"\n OldCost: " << OldCost
4547 <<
" vs NewCost: " << NewCost <<
"\n");
4553 if (NewCost > OldCost)
4562 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4563 replaceValue(
I, *NewCmp);
4594bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4597 const APInt *CmpVal;
4602 if (!
II || !
II->hasOneUse())
4605 const auto IsValidOrUmaxCmp = [&]() {
4610 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4612 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4613 Pred == ICmpInst::ICMP_SLT;
4614 return IsEquality || IsPositive || IsNegative;
4617 const auto IsValidAndUminCmp = [&]() {
4618 const auto LeadingOnes = CmpVal->
countl_one();
4625 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4634 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4635 return IsEquality || IsNegative || IsPositive;
4643 switch (OriginalIID) {
4644 case Intrinsic::vector_reduce_or:
4645 if (!IsValidOrUmaxCmp())
4647 AlternativeIID = Intrinsic::vector_reduce_umax;
4649 case Intrinsic::vector_reduce_umax:
4650 if (!IsValidOrUmaxCmp())
4652 AlternativeIID = Intrinsic::vector_reduce_or;
4654 case Intrinsic::vector_reduce_and:
4655 if (!IsValidAndUminCmp())
4657 AlternativeIID = Intrinsic::vector_reduce_umin;
4659 case Intrinsic::vector_reduce_umin:
4660 if (!IsValidAndUminCmp())
4662 AlternativeIID = Intrinsic::vector_reduce_and;
4675 if (ReductionOpc != Instruction::ICmp)
4686 <<
"\n OrigCost: " << OrigCost
4687 <<
" vs AltCost: " << AltCost <<
"\n");
4689 if (AltCost >= OrigCost)
4693 Type *ScalarTy = VecTy->getScalarType();
4696 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
4698 replaceValue(
I, *NewCmp);
4707 constexpr unsigned MaxVisited = 32;
4710 bool FoundReduction =
false;
4713 while (!WorkList.
empty()) {
4715 for (
User *U :
I->users()) {
4717 if (!UI || !Visited.
insert(UI).second)
4719 if (Visited.
size() > MaxVisited)
4725 switch (
II->getIntrinsicID()) {
4726 case Intrinsic::vector_reduce_add:
4727 case Intrinsic::vector_reduce_mul:
4728 case Intrinsic::vector_reduce_and:
4729 case Intrinsic::vector_reduce_or:
4730 case Intrinsic::vector_reduce_xor:
4731 case Intrinsic::vector_reduce_smin:
4732 case Intrinsic::vector_reduce_smax:
4733 case Intrinsic::vector_reduce_umin:
4734 case Intrinsic::vector_reduce_umax:
4735 FoundReduction =
true;
4748 return FoundReduction;
4761bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
4766 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
4774 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
4776 if (!
I ||
I->getOperand(0)->getType() != VT)
4778 return any_of(
I->users(), [&](User *U) {
4779 return U != Op0 && U != Op1 &&
4780 !(isa<ShuffleVectorInst>(U) &&
4781 (InputShuffles.contains(cast<Instruction>(U)) ||
4782 isInstructionTriviallyDead(cast<Instruction>(U))));
4785 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
4786 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
4794 for (
auto *U :
I->users()) {
4796 if (!SV || SV->getType() != VT)
4798 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
4799 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
4806 if (!collectShuffles(Op0) || !collectShuffles(Op1))
4810 if (FromReduction && Shuffles.
size() > 1)
4815 if (!FromReduction) {
4816 for (ShuffleVectorInst *SV : Shuffles) {
4817 for (
auto *U : SV->users()) {
4820 Shuffles.push_back(SSV);
4832 int MaxV1Elt = 0, MaxV2Elt = 0;
4833 unsigned NumElts = VT->getNumElements();
4834 for (ShuffleVectorInst *SVN : Shuffles) {
4835 SmallVector<int>
Mask;
4836 SVN->getShuffleMask(Mask);
4840 Value *SVOp0 = SVN->getOperand(0);
4841 Value *SVOp1 = SVN->getOperand(1);
4846 for (
int &Elem : Mask) {
4852 if (SVOp0 == Op1 && SVOp1 == Op0) {
4856 if (SVOp0 != Op0 || SVOp1 != Op1)
4862 SmallVector<int> ReconstructMask;
4863 for (
unsigned I = 0;
I <
Mask.size();
I++) {
4866 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
4867 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
4868 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
4869 return Mask[
I] ==
A.first;
4878 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
4879 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
4880 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
4894 sort(ReconstructMask);
4895 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
4903 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
4904 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
4916 if (InputShuffles.contains(SSV))
4918 return SV->getMaskValue(M);
4926 std::pair<int, int>
Y) {
4927 int MXA = GetBaseMaskValue(
A,
X.first);
4928 int MYA = GetBaseMaskValue(
A,
Y.first);
4931 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4932 return SortBase(SVI0A,
A,
B);
4934 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4935 return SortBase(SVI1A,
A,
B);
4940 for (
const auto &Mask : OrigReconstructMasks) {
4941 SmallVector<int> ReconstructMask;
4942 for (
int M : Mask) {
4944 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
4945 assert(It !=
V.end() &&
"Expected all entries in Mask");
4946 return std::distance(
V.begin(), It);
4950 else if (M <
static_cast<int>(NumElts)) {
4951 ReconstructMask.
push_back(FindIndex(V1, M));
4953 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
4956 ReconstructMasks.
push_back(std::move(ReconstructMask));
4961 SmallVector<int> V1A, V1B, V2A, V2B;
4962 for (
unsigned I = 0;
I < V1.
size();
I++) {
4963 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
4964 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
4966 for (
unsigned I = 0;
I < V2.
size();
I++) {
4967 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
4968 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
4970 while (V1A.
size() < NumElts) {
4974 while (V2A.
size() < NumElts) {
4986 VT, VT, SV->getShuffleMask(),
CostKind);
4993 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
4994 unsigned MaxVectorSize =
4996 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
4997 if (MaxElementsInVector == 0)
5006 std::set<SmallVector<int, 4>> UniqueShuffles;
5011 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5012 if (NumFullVectors < 2)
5013 return C + ShuffleCost;
5014 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5015 unsigned NumUniqueGroups = 0;
5016 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5019 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5020 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5021 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5022 if (UniqueShuffles.insert(SubShuffle).second)
5023 NumUniqueGroups += 1;
5025 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5031 SmallVector<int, 16>
Mask;
5032 SV->getShuffleMask(Mask);
5033 return AddShuffleMaskAdjustedCost(
C, Mask);
5036 auto AllShufflesHaveSameOperands =
5037 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5038 if (InputShuffles.size() < 2)
5040 ShuffleVectorInst *FirstSV =
5047 std::next(InputShuffles.begin()), InputShuffles.end(),
5048 [&](Instruction *
I) {
5049 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5050 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5059 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5061 if (AllShufflesHaveSameOperands(InputShuffles)) {
5062 UniqueShuffles.clear();
5063 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5066 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5072 FixedVectorType *Op0SmallVT =
5074 FixedVectorType *Op1SmallVT =
5079 UniqueShuffles.clear();
5080 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5082 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5084 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5087 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5089 <<
" vs CostAfter: " << CostAfter <<
"\n");
5090 if (CostBefore < CostAfter ||
5101 if (InputShuffles.contains(SSV))
5103 return SV->getOperand(
Op);
5107 GetShuffleOperand(SVI0A, 1), V1A);
5110 GetShuffleOperand(SVI0B, 1), V1B);
5113 GetShuffleOperand(SVI1A, 1), V2A);
5116 GetShuffleOperand(SVI1B, 1), V2B);
5121 I->copyIRFlags(Op0,
true);
5126 I->copyIRFlags(Op1,
true);
5128 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5131 replaceValue(*Shuffles[S], *NSV,
false);
5134 Worklist.pushValue(NSV0A);
5135 Worklist.pushValue(NSV0B);
5136 Worklist.pushValue(NSV1A);
5137 Worklist.pushValue(NSV1B);
5147bool VectorCombine::shrinkType(Instruction &
I) {
5148 Value *ZExted, *OtherOperand;
5154 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5158 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5160 if (
I.getOpcode() == Instruction::LShr) {
5177 Instruction::ZExt, BigTy, SmallTy,
5178 TargetTransformInfo::CastContextHint::None,
CostKind);
5183 for (User *U : ZExtOperand->
users()) {
5190 ShrinkCost += ZExtCost;
5205 ShrinkCost += ZExtCost;
5212 Instruction::Trunc, SmallTy, BigTy,
5213 TargetTransformInfo::CastContextHint::None,
CostKind);
5218 if (ShrinkCost > CurrentCost)
5222 Value *Op0 = ZExted;
5225 if (
I.getOperand(0) == OtherOperand)
5232 replaceValue(
I, *NewZExtr);
5238bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5239 Value *DstVec, *SrcVec;
5240 uint64_t ExtIdx, InsIdx;
5250 if (!DstVecTy || !SrcVecTy ||
5256 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5263 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5265 if (NeedDstSrcSwap) {
5267 Mask[InsIdx] = ExtIdx % NumDstElts;
5271 std::iota(
Mask.begin(),
Mask.end(), 0);
5272 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5285 SmallVector<int> ExtToVecMask;
5286 if (!NeedExpOrNarrow) {
5291 nullptr, {DstVec, SrcVec});
5297 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5300 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5304 if (!Ext->hasOneUse())
5307 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5308 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5311 if (OldCost < NewCost)
5314 if (NeedExpOrNarrow) {
5315 if (!NeedDstSrcSwap)
5328 replaceValue(
I, *Shuf);
5337bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5338 const APInt *SplatVal0, *SplatVal1;
5348 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5349 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5358 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5359 << *
I.getType() <<
" is too high.\n");
5363 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5364 NewSplatVal <<= Width;
5365 NewSplatVal |= SplatVal0->
zext(Width * 2);
5367 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5375bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
5377 if (!OldLoad || !OldLoad->isSimple())
5384 unsigned const OldNumElements = OldLoadTy->getNumElements();
5390 using IndexRange = std::pair<int, int>;
5391 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
5392 IndexRange OutputRange = IndexRange(OldNumElements, -1);
5393 for (llvm::Use &Use :
I.uses()) {
5395 User *Shuffle =
Use.getUser();
5400 return std::nullopt;
5407 for (
int Index : Mask) {
5408 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
5409 OutputRange.first = std::min(Index, OutputRange.first);
5410 OutputRange.second = std::max(Index, OutputRange.second);
5415 if (OutputRange.second < OutputRange.first)
5416 return std::nullopt;
5422 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
5423 unsigned const NewNumElements = (Indices->second + 1) - Indices->first;
5427 if (NewNumElements < OldNumElements) {
5432 Type *ElemTy = OldLoadTy->getElementType();
5434 Value *PtrOp = OldLoad->getPointerOperand();
5437 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
5438 OldLoad->getPointerAddressSpace(),
CostKind);
5441 OldLoad->getPointerAddressSpace(),
CostKind);
5443 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
5445 unsigned const LowOffset = Indices->first;
5446 unsigned const HighOffset = OldNumElements - (Indices->second + 1);
5448 for (llvm::Use &Use :
I.uses()) {
5450 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
5454 std::vector<int> &NewMask = NewUses.
back().second;
5455 for (
int Index : OldMask) {
5458 NewMask.push_back(Index);
5461 int NewIndex =
Index >=
static_cast<int>(OldNumElements)
5462 ? Index - LowOffset - HighOffset
5463 :
Index - LowOffset;
5464 if (NewIndex >=
static_cast<int>(NewNumElements * 2u))
5466 NewMask.push_back(NewIndex);
5479 dbgs() <<
"Found a load used only by shufflevector instructions: "
5480 <<
I <<
"\n OldCost: " << OldCost
5481 <<
" vs NewCost: " << NewCost <<
"\n");
5483 if (OldCost < NewCost || !NewCost.
isValid())
5488 TypeSize ElemSize =
DL->getTypeAllocSize(ElemTy);
5492 PtrOp, ConstantInt::get(IndexTy, LowOffset * ElemSize))
5497 NewLoad->copyMetadata(
I);
5500 for (UseEntry &Use : NewUses) {
5501 ShuffleVectorInst *Shuffle =
Use.first;
5502 std::vector<int> &NewMask =
Use.second;
5509 replaceValue(*Shuffle, *NewShuffle,
false);
5522bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
5524 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
5528 ArrayRef<int> Mask0;
5529 ArrayRef<int> Mask1;
5542 auto const InputNumElements = InputVT->getNumElements();
5544 if (InputNumElements >= ResultVT->getNumElements())
5549 SmallVector<int, 16> NewMask;
5552 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
5553 if (
M0 >= 0 &&
M1 >= 0)
5555 else if (
M0 == -1 &&
M1 == -1)
5568 int MaskOffset = NewMask[0
u];
5569 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
5572 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
5586 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5589 if (NewCost > OldCost)
5601 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
5603 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
5609 replaceValue(*Phi, *NewShuf1);
5615bool VectorCombine::run() {
5629 auto Opcode =
I.getOpcode();
5637 if (IsFixedVectorType) {
5639 case Instruction::InsertElement:
5640 if (vectorizeLoadInsert(
I))
5643 case Instruction::ShuffleVector:
5644 if (widenSubvectorLoad(
I))
5655 if (scalarizeOpOrCmp(
I))
5657 if (scalarizeLoad(
I))
5659 if (scalarizeExtExtract(
I))
5661 if (scalarizeVPIntrinsic(
I))
5663 if (foldInterleaveIntrinsics(
I))
5667 if (Opcode == Instruction::Store)
5668 if (foldSingleElementStore(
I))
5672 if (TryEarlyFoldsOnly)
5679 if (IsFixedVectorType) {
5681 case Instruction::InsertElement:
5682 if (foldInsExtFNeg(
I))
5684 if (foldInsExtBinop(
I))
5686 if (foldInsExtVectorToShuffle(
I))
5689 case Instruction::ShuffleVector:
5690 if (foldPermuteOfBinops(
I))
5692 if (foldShuffleOfBinops(
I))
5694 if (foldShuffleOfSelects(
I))
5696 if (foldShuffleOfCastops(
I))
5698 if (foldShuffleOfShuffles(
I))
5700 if (foldPermuteOfIntrinsic(
I))
5702 if (foldShufflesOfLengthChangingShuffles(
I))
5704 if (foldShuffleOfIntrinsics(
I))
5706 if (foldSelectShuffle(
I))
5708 if (foldShuffleToIdentity(
I))
5711 case Instruction::Load:
5712 if (shrinkLoadForShuffles(
I))
5715 case Instruction::BitCast:
5716 if (foldBitcastShuffle(
I))
5718 if (foldSelectsFromBitcast(
I))
5721 case Instruction::And:
5722 case Instruction::Or:
5723 case Instruction::Xor:
5724 if (foldBitOpOfCastops(
I))
5726 if (foldBitOpOfCastConstant(
I))
5729 case Instruction::PHI:
5730 if (shrinkPhiOfShuffles(
I))
5740 case Instruction::Call:
5741 if (foldShuffleFromReductions(
I))
5743 if (foldCastFromReductions(
I))
5746 case Instruction::ExtractElement:
5747 if (foldShuffleChainsToReduce(
I))
5750 case Instruction::ICmp:
5751 if (foldSignBitReductionCmp(
I))
5753 if (foldICmpEqZeroVectorReduce(
I))
5755 if (foldEquivalentReductionCmp(
I))
5758 case Instruction::FCmp:
5759 if (foldExtractExtract(
I))
5762 case Instruction::Or:
5763 if (foldConcatOfBoolMasks(
I))
5768 if (foldExtractExtract(
I))
5770 if (foldExtractedCmps(
I))
5772 if (foldBinopOfReductions(
I))
5781 bool MadeChange =
false;
5782 for (BasicBlock &BB :
F) {
5794 if (!
I->isDebugOrPseudoInst())
5795 MadeChange |= FoldInst(*
I);
5802 while (!Worklist.isEmpty()) {
5812 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const