43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
94 const SimplifyQuery SQ;
98 bool TryEarlyFoldsOnly;
100 InstructionWorklist Worklist;
109 bool vectorizeLoadInsert(Instruction &
I);
110 bool widenSubvectorLoad(Instruction &
I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex)
const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &
I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &
I);
121 bool foldInsExtFNeg(Instruction &
I);
122 bool foldInsExtBinop(Instruction &
I);
123 bool foldInsExtVectorToShuffle(Instruction &
I);
124 bool foldBitOpOfCastops(Instruction &
I);
125 bool foldBitOpOfCastConstant(Instruction &
I);
126 bool foldBitcastShuffle(Instruction &
I);
127 bool scalarizeOpOrCmp(Instruction &
I);
128 bool scalarizeVPIntrinsic(Instruction &
I);
129 bool foldExtractedCmps(Instruction &
I);
130 bool foldBinopOfReductions(Instruction &
I);
131 bool foldSingleElementStore(Instruction &
I);
132 bool scalarizeLoadExtract(Instruction &
I);
133 bool scalarizeExtExtract(Instruction &
I);
134 bool foldConcatOfBoolMasks(Instruction &
I);
135 bool foldPermuteOfBinops(Instruction &
I);
136 bool foldShuffleOfBinops(Instruction &
I);
137 bool foldShuffleOfSelects(Instruction &
I);
138 bool foldShuffleOfCastops(Instruction &
I);
139 bool foldShuffleOfShuffles(Instruction &
I);
140 bool foldShuffleOfIntrinsics(Instruction &
I);
141 bool foldShuffleToIdentity(Instruction &
I);
142 bool foldShuffleFromReductions(Instruction &
I);
143 bool foldShuffleChainsToReduce(Instruction &
I);
144 bool foldCastFromReductions(Instruction &
I);
145 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
146 bool foldInterleaveIntrinsics(Instruction &
I);
147 bool shrinkType(Instruction &
I);
148 bool shrinkLoadForShuffles(Instruction &
I);
149 bool shrinkPhiOfShuffles(Instruction &
I);
151 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
157 Worklist.pushUsersToWorkList(*NewI);
158 Worklist.pushValue(NewI);
175 SmallPtrSet<Value *, 4> Visited;
180 OpI,
nullptr,
nullptr, [&](
Value *V) {
185 NextInst = NextInst->getNextNode();
190 Worklist.pushUsersToWorkList(*OpI);
191 Worklist.pushValue(OpI);
211 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
212 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
218 Type *ScalarTy = Load->getType()->getScalarType();
220 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
221 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
228bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
254 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
257 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
258 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
259 unsigned OffsetEltIndex = 0;
267 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
268 APInt
Offset(OffsetBitWidth, 0);
278 uint64_t ScalarSizeInBytes = ScalarSize / 8;
279 if (
Offset.urem(ScalarSizeInBytes) != 0)
283 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
284 if (OffsetEltIndex >= MinVecNumElts)
301 unsigned AS =
Load->getPointerAddressSpace();
320 unsigned OutputNumElts = Ty->getNumElements();
322 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
323 Mask[0] = OffsetEltIndex;
330 if (OldCost < NewCost || !NewCost.
isValid())
341 replaceValue(
I, *VecLd);
349bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
352 if (!Shuf->isIdentityWithPadding())
358 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
359 return M >= (int)(NumOpElts);
370 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
378 unsigned AS =
Load->getPointerAddressSpace();
393 if (OldCost < NewCost || !NewCost.
isValid())
400 replaceValue(
I, *VecLd);
407ExtractElementInst *VectorCombine::getShuffleExtract(
408 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
412 assert(Index0C && Index1C &&
"Expected constant extract indexes");
414 unsigned Index0 = Index0C->getZExtValue();
415 unsigned Index1 = Index1C->getZExtValue();
418 if (Index0 == Index1)
442 if (PreferredExtractIndex == Index0)
444 if (PreferredExtractIndex == Index1)
448 return Index0 > Index1 ? Ext0 : Ext1;
456bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
457 ExtractElementInst *Ext1,
458 const Instruction &
I,
459 ExtractElementInst *&ConvertToShuffle,
460 unsigned PreferredExtractIndex) {
463 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
465 unsigned Opcode =
I.getOpcode();
478 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
479 "Expected a compare");
489 unsigned Ext0Index = Ext0IndexC->getZExtValue();
490 unsigned Ext1Index = Ext1IndexC->getZExtValue();
504 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
505 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
506 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
511 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
516 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
518 OldCost = CheapExtractCost + ScalarOpCost;
519 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
523 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
524 NewCost = VectorOpCost + CheapExtractCost +
529 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
530 if (ConvertToShuffle) {
542 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
544 ShuffleMask[BestInsIndex] = BestExtIndex;
546 VecTy, VecTy, ShuffleMask,
CostKind, 0,
547 nullptr, {ConvertToShuffle});
550 VecTy, VecTy, {},
CostKind, 0,
nullptr,
558 return OldCost < NewCost;
570 ShufMask[NewIndex] = OldIndex;
571 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
623 V1,
"foldExtExtBinop");
628 VecBOInst->copyIRFlags(&
I);
634bool VectorCombine::foldExtractExtract(Instruction &
I) {
665 ExtractElementInst *ExtractToChange;
666 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
672 if (ExtractToChange) {
673 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
678 if (ExtractToChange == Ext0)
687 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
688 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
691 replaceValue(
I, *NewExt);
697bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
717 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
721 unsigned NumElts = VecTy->getNumElements();
722 if (Index >= NumElts)
728 SmallVector<int>
Mask(NumElts);
729 std::iota(
Mask.begin(),
Mask.end(), 0);
746 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
749 SmallVector<int> SrcMask;
754 VecTy, SrcVecTy, SrcMask,
CostKind);
757 if (NewCost > OldCost)
772 replaceValue(
I, *NewShuf);
778bool VectorCombine::foldInsExtBinop(Instruction &
I) {
779 BinaryOperator *VecBinOp, *SclBinOp;
811 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
813 if (NewCost > OldCost)
824 NewInst->copyIRFlags(VecBinOp);
825 NewInst->andIRFlags(SclBinOp);
830 replaceValue(
I, *NewBO);
836bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
839 if (!BinOp || !BinOp->isBitwiseLogicOp())
845 if (!LHSCast || !RHSCast) {
846 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
852 if (CastOpcode != RHSCast->getOpcode())
856 switch (CastOpcode) {
857 case Instruction::BitCast:
858 case Instruction::Trunc:
859 case Instruction::SExt:
860 case Instruction::ZExt:
866 Value *LHSSrc = LHSCast->getOperand(0);
867 Value *RHSSrc = RHSCast->getOperand(0);
873 auto *SrcTy = LHSSrc->
getType();
874 auto *DstTy =
I.getType();
877 if (CastOpcode != Instruction::BitCast &&
882 if (!SrcTy->getScalarType()->isIntegerTy() ||
883 !DstTy->getScalarType()->isIntegerTy())
898 LHSCastCost + RHSCastCost;
909 if (!LHSCast->hasOneUse())
910 NewCost += LHSCastCost;
911 if (!RHSCast->hasOneUse())
912 NewCost += RHSCastCost;
915 <<
" NewCost=" << NewCost <<
"\n");
917 if (NewCost > OldCost)
922 BinOp->getName() +
".inner");
924 NewBinOp->copyIRFlags(BinOp);
938 replaceValue(
I, *Result);
947bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
963 switch (CastOpcode) {
964 case Instruction::BitCast:
965 case Instruction::ZExt:
966 case Instruction::SExt:
967 case Instruction::Trunc:
973 Value *LHSSrc = LHSCast->getOperand(0);
975 auto *SrcTy = LHSSrc->
getType();
976 auto *DstTy =
I.getType();
979 if (CastOpcode != Instruction::BitCast &&
984 if (!SrcTy->getScalarType()->isIntegerTy() ||
985 !DstTy->getScalarType()->isIntegerTy())
989 PreservedCastFlags RHSFlags;
1014 if (!LHSCast->hasOneUse())
1015 NewCost += LHSCastCost;
1017 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1018 <<
" NewCost=" << NewCost <<
"\n");
1020 if (NewCost > OldCost)
1025 LHSSrc, InvC,
I.getName() +
".inner");
1027 NewBinOp->copyIRFlags(&
I);
1047 replaceValue(
I, *Result);
1054bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1068 if (!DestTy || !SrcTy)
1071 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1072 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1073 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1083 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1084 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1088 SmallVector<int, 16> NewMask;
1089 if (DestEltSize <= SrcEltSize) {
1092 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1093 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1098 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1099 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1106 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1107 auto *NewShuffleTy =
1109 auto *OldShuffleTy =
1111 unsigned NumOps = IsUnary ? 1 : 2;
1121 TargetTransformInfo::CastContextHint::None,
1126 TargetTransformInfo::CastContextHint::None,
1129 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1130 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1132 if (NewCost > OldCost || !NewCost.
isValid())
1140 replaceValue(
I, *Shuf);
1147bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1161 if (!ScalarOp0 || !ScalarOp1)
1169 auto IsAllTrueMask = [](
Value *MaskVal) {
1172 return ConstValue->isAllOnesValue();
1186 SmallVector<int>
Mask;
1188 Mask.resize(FVTy->getNumElements(), 0);
1197 Args.push_back(
V->getType());
1198 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1203 std::optional<unsigned> FunctionalOpcode =
1205 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1206 if (!FunctionalOpcode) {
1215 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1225 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1227 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1230 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1233 if (OldCost < NewCost || !NewCost.
isValid())
1244 bool SafeToSpeculate;
1250 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1251 if (!SafeToSpeculate &&
1258 {ScalarOp0, ScalarOp1})
1260 ScalarOp0, ScalarOp1);
1269bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1274 if (!UO && !BO && !CI && !
II)
1282 if (Arg->getType() !=
II->getType() &&
1292 for (User *U :
I.users())
1299 std::optional<uint64_t>
Index;
1301 auto Ops =
II ?
II->args() :
I.operands();
1305 uint64_t InsIdx = 0;
1310 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1316 else if (InsIdx != *Index)
1333 if (!
Index.has_value())
1337 Type *ScalarTy = VecTy->getScalarType();
1338 assert(VecTy->isVectorTy() &&
1341 "Unexpected types for insert element into binop or cmp");
1343 unsigned Opcode =
I.getOpcode();
1351 }
else if (UO || BO) {
1355 IntrinsicCostAttributes ScalarICA(
1356 II->getIntrinsicID(), ScalarTy,
1359 IntrinsicCostAttributes VectorICA(
1360 II->getIntrinsicID(), VecTy,
1367 Value *NewVecC =
nullptr;
1369 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1372 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1374 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1388 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1390 II->getIntrinsicID(), Idx, &
TTI)))
1393 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1394 OldCost += InsertCost;
1395 NewCost += !
Op->hasOneUse() * InsertCost;
1399 if (OldCost < NewCost || !NewCost.
isValid())
1409 ++NumScalarIntrinsic;
1419 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1425 Scalar->setName(
I.getName() +
".scalar");
1430 ScalarInst->copyIRFlags(&
I);
1433 replaceValue(
I, *Insert);
1440bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1445 if (!BI || !
I.getType()->isIntegerTy(1))
1450 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1453 CmpPredicate
P0,
P1;
1465 uint64_t Index0, Index1;
1472 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1475 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1476 "Unknown ExtractElementInst");
1481 unsigned CmpOpcode =
1496 Ext0Cost + Ext1Cost + CmpCost * 2 +
1502 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1503 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1508 ShufMask[CheapIndex] = ExpensiveIndex;
1513 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1514 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1519 if (OldCost < NewCost || !NewCost.
isValid())
1529 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1530 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1533 replaceValue(
I, *NewExt);
1546 unsigned ReductionOpc =
1552 CostBeforeReduction =
1553 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1555 CostAfterReduction =
1556 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1560 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1566 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1573 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1576 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1578 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1581 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1582 CostAfterReduction =
TTI.getMulAccReductionCost(
1583 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1586 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1590bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1593 if (BinOpOpc == Instruction::Sub)
1594 ReductionIID = Intrinsic::vector_reduce_add;
1598 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1603 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1604 return II->getArgOperand(0);
1608 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1611 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1620 unsigned ReductionOpc =
1633 CostOfRedOperand0 + CostOfRedOperand1 +
1636 if (NewCost >= OldCost || !NewCost.
isValid())
1640 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1643 if (BinOpOpc == Instruction::Or)
1644 VectorBO = Builder.
CreateOr(V0, V1,
"",
1650 replaceValue(
I, *Rdx);
1658 unsigned NumScanned = 0;
1659 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1668class ScalarizationResult {
1669 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1674 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1675 : Status(Status), ToFreeze(ToFreeze) {}
1678 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1679 ~ScalarizationResult() {
1680 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1683 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1684 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1685 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1686 return {StatusTy::SafeWithFreeze, ToFreeze};
1690 bool isSafe()
const {
return Status == StatusTy::Safe; }
1692 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1695 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1700 Status = StatusTy::Unsafe;
1704 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1705 assert(isSafeWithFreeze() &&
1706 "should only be used when freezing is required");
1708 "UserI must be a user of ToFreeze");
1709 IRBuilder<>::InsertPointGuard Guard(Builder);
1714 if (
U.get() == ToFreeze)
1731 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1735 if (
C->getValue().ult(NumElements))
1736 return ScalarizationResult::safe();
1737 return ScalarizationResult::unsafe();
1742 return ScalarizationResult::unsafe();
1744 APInt Zero(IntWidth, 0);
1745 APInt MaxElts(IntWidth, NumElements);
1751 true, &AC, CtxI, &DT)))
1752 return ScalarizationResult::safe();
1753 return ScalarizationResult::unsafe();
1766 if (ValidIndices.
contains(IdxRange))
1767 return ScalarizationResult::safeWithFreeze(IdxBase);
1768 return ScalarizationResult::unsafe();
1780 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1792bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1804 if (!
match(
SI->getValueOperand(),
1811 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1814 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1815 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1816 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1820 if (ScalarizableIdx.isUnsafe() ||
1827 Worklist.
push(Load);
1829 if (ScalarizableIdx.isSafeWithFreeze())
1832 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1833 {ConstantInt::get(Idx->getType(), 0), Idx});
1837 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1840 replaceValue(
I, *NSI);
1849bool VectorCombine::scalarizeLoadExtract(Instruction &
I) {
1859 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1864 LI->getPointerAddressSpace(),
CostKind);
1868 unsigned NumInstChecked = 0;
1869 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
1872 for (
auto &Pair : NeedFreeze)
1873 Pair.second.discard();
1879 for (User *U : LI->users()) {
1881 if (!UI || UI->getParent() != LI->getParent())
1886 if (UI->use_empty())
1892 for (Instruction &
I :
1893 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1900 LastCheckedInst = UI;
1905 if (ScalarIdx.isUnsafe())
1907 if (ScalarIdx.isSafeWithFreeze()) {
1908 NeedFreeze.try_emplace(UI, ScalarIdx);
1909 ScalarIdx.discard();
1915 Index ?
Index->getZExtValue() : -1);
1924 <<
"\n LoadExtractCost: " << OriginalCost
1925 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1927 if (ScalarizedCost >= OriginalCost)
1934 Type *ElemType = VecTy->getElementType();
1937 for (User *U : LI->users()) {
1939 Value *Idx = EI->getIndexOperand();
1942 auto It = NeedFreeze.find(EI);
1943 if (It != NeedFreeze.end())
1950 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1952 Align ScalarOpAlignment =
1954 NewLoad->setAlignment(ScalarOpAlignment);
1957 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1958 AAMDNodes OldAAMD = LI->getAAMetadata();
1962 replaceValue(*EI, *NewLoad,
false);
1965 FailureGuard.release();
1969bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
1984 Type *ScalarDstTy = DstTy->getElementType();
1985 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
1991 unsigned ExtCnt = 0;
1992 bool ExtLane0 =
false;
1993 for (User *U :
Ext->users()) {
2007 Instruction::And, ScalarDstTy,
CostKind,
2010 (ExtCnt - ExtLane0) *
2012 Instruction::LShr, ScalarDstTy,
CostKind,
2015 if (ScalarCost > VectorCost)
2018 Value *ScalarV =
Ext->getOperand(0);
2025 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2026 bool AllExtractsTriggerUB =
true;
2027 ExtractElementInst *LastExtract =
nullptr;
2029 for (User *U :
Ext->users()) {
2032 AllExtractsTriggerUB =
false;
2036 if (!LastExtract || LastExtract->
comesBefore(Extract))
2037 LastExtract = Extract;
2039 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2040 !AllExtractsTriggerUB ||
2048 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2049 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2050 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2052 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2053 for (User *U :
Ext->users()) {
2059 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2060 : (Idx * SrcEltSizeInBits);
2063 U->replaceAllUsesWith(
And);
2071bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2072 Type *Ty =
I.getType();
2077 if (
DL->isBigEndian())
2088 uint64_t ShAmtX = 0;
2096 uint64_t ShAmtY = 0;
2104 if (ShAmtX > ShAmtY) {
2112 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2113 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2118 MaskTy->getNumElements() != ShAmtDiff ||
2119 MaskTy->getNumElements() > (
BitWidth / 2))
2124 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2125 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2128 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2145 if (Ty != ConcatIntTy)
2151 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2152 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2155 if (NewCost > OldCost)
2165 if (Ty != ConcatIntTy) {
2175 replaceValue(
I, *Result);
2181bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2182 BinaryOperator *BinOp;
2183 ArrayRef<int> OuterMask;
2192 Value *Op00, *Op01, *Op10, *Op11;
2193 ArrayRef<int> Mask0, Mask1;
2200 if (!Match0 && !Match1)
2213 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2216 unsigned NumSrcElts = BinOpTy->getNumElements();
2221 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2225 SmallVector<int> NewMask0, NewMask1;
2226 for (
int M : OuterMask) {
2227 if (M < 0 || M >= (
int)NumSrcElts) {
2231 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2232 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2236 unsigned NumOpElts = Op0Ty->getNumElements();
2237 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2238 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2240 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2241 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2248 BinOpTy, OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
2264 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2268 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2270 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2271 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2275 if (NewCost > OldCost)
2286 NewInst->copyIRFlags(BinOp);
2290 replaceValue(
I, *NewBO);
2296bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2297 ArrayRef<int> OldMask;
2304 if (
LHS->getOpcode() !=
RHS->getOpcode())
2308 bool IsCommutative =
false;
2317 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2328 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2331 unsigned NumSrcElts = BinOpTy->getNumElements();
2334 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2337 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2338 if (M >= (
int)NumSrcElts)
2342 SmallVector<int> NewMask0(OldMask);
2350 SmallVector<int> NewMask1(OldMask);
2373 ArrayRef<int> InnerMask;
2375 m_Mask(InnerMask)))) &&
2378 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2390 bool ReducedInstCount =
false;
2391 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2392 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2393 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2394 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2396 auto *ShuffleCmpTy =
2413 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2420 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2428 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2432 NewInst->copyIRFlags(
LHS);
2433 NewInst->andIRFlags(
RHS);
2438 replaceValue(
I, *NewBO);
2445bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2447 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2456 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2462 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2463 ((SI0FOp !=
nullptr) &&
2464 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2470 auto SelOp = Instruction::Select;
2477 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2481 Mask,
CostKind, 0,
nullptr, {C1, C2});
2487 toVectorTy(Type::getInt1Ty(
I.getContext()), DstVecTy->getNumElements()));
2492 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2494 if (NewCost > OldCost)
2503 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2504 SI0FOp->getFastMathFlags());
2506 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2511 replaceValue(
I, *NewSel);
2517bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2519 ArrayRef<int> OldMask;
2528 if (!C0 || (IsBinaryShuffle && !C1))
2535 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2538 if (IsBinaryShuffle) {
2539 if (C0->getSrcTy() != C1->getSrcTy())
2542 if (Opcode != C1->getOpcode()) {
2544 Opcode = Instruction::SExt;
2553 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2556 unsigned NumSrcElts = CastSrcTy->getNumElements();
2557 unsigned NumDstElts = CastDstTy->getNumElements();
2558 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2559 "Only bitcasts expected to alter src/dst element counts");
2563 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2564 (NumDstElts % NumSrcElts) != 0)
2567 SmallVector<int, 16> NewMask;
2568 if (NumSrcElts >= NumDstElts) {
2571 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2572 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2577 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2578 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2583 auto *NewShuffleDstTy =
2592 if (IsBinaryShuffle)
2607 if (IsBinaryShuffle) {
2617 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2619 if (NewCost > OldCost)
2623 if (IsBinaryShuffle)
2633 NewInst->copyIRFlags(C0);
2634 if (IsBinaryShuffle)
2635 NewInst->andIRFlags(C1);
2639 replaceValue(
I, *Cast);
2649bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2650 ArrayRef<int> OuterMask;
2651 Value *OuterV0, *OuterV1;
2656 ArrayRef<int> InnerMask0, InnerMask1;
2657 Value *X0, *X1, *Y0, *Y1;
2662 if (!Match0 && !Match1)
2667 SmallVector<int, 16> PoisonMask1;
2672 InnerMask1 = PoisonMask1;
2676 X0 = Match0 ? X0 : OuterV0;
2677 Y0 = Match0 ? Y0 : OuterV0;
2678 X1 = Match1 ? X1 : OuterV1;
2679 Y1 = Match1 ? Y1 : OuterV1;
2683 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2687 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2688 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2693 SmallVector<int, 16> NewMask(OuterMask);
2694 Value *NewX =
nullptr, *NewY =
nullptr;
2695 for (
int &M : NewMask) {
2696 Value *Src =
nullptr;
2697 if (0 <= M && M < (
int)NumImmElts) {
2701 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2702 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2704 }
else if (M >= (
int)NumImmElts) {
2709 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2710 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2714 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2723 if (!NewX || NewX == Src) {
2727 if (!NewY || NewY == Src) {
2743 replaceValue(
I, *NewX);
2760 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2766 nullptr, {NewX, NewY});
2768 NewCost += InnerCost0;
2770 NewCost += InnerCost1;
2773 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2775 if (NewCost > OldCost)
2779 replaceValue(
I, *Shuf);
2785bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
2787 ArrayRef<int> OldMask;
2798 if (IID != II1->getIntrinsicID())
2803 if (!ShuffleDstTy || !II0Ty)
2809 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2811 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2818 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
2822 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
2824 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2828 ShuffleDstTy->getNumElements());
2834 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
2838 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2841 if (NewCost > OldCost)
2845 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2850 II1->getArgOperand(
I), OldMask);
2858 NewInst->copyIRFlags(II0);
2859 NewInst->andIRFlags(II1);
2862 replaceValue(
I, *NewIntrinsic);
2872 int M = SV->getMaskValue(Lane);
2875 if (
static_cast<unsigned>(M) < NumElts) {
2876 U = &SV->getOperandUse(0);
2879 U = &SV->getOperandUse(1);
2890 auto [U, Lane] = IL;
2904 unsigned NumElts = Ty->getNumElements();
2905 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2911 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2917 unsigned NumSlices = Item.
size() / NumElts;
2922 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2923 Use *SliceV = Item[Slice * NumElts].first;
2924 if (!SliceV || SliceV->get()->
getType() != Ty)
2926 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2927 auto [V, Lane] = Item[Slice * NumElts + Elt];
2928 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2941 auto [FrontU, FrontLane] = Item.
front();
2943 if (IdentityLeafs.
contains(FrontU)) {
2944 return FrontU->get();
2948 return Builder.CreateShuffleVector(FrontU->get(), Mask);
2950 if (ConcatLeafs.
contains(FrontU)) {
2954 for (
unsigned S = 0; S < Values.
size(); ++S)
2955 Values[S] = Item[S * NumElts].first->get();
2957 while (Values.
size() > 1) {
2960 std::iota(Mask.begin(), Mask.end(), 0);
2962 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2964 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
2972 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2974 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
2977 Ops[Idx] =
II->getOperand(Idx);
2981 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2986 for (
const auto &Lane : Item)
2999 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3009 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3014 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3028bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3030 if (!Ty ||
I.use_empty())
3034 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3039 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3040 unsigned NumVisited = 0;
3042 while (!Worklist.
empty()) {
3047 auto [FrontU, FrontLane] = Item.
front();
3055 return X->getType() ==
Y->getType() &&
3060 if (FrontLane == 0 &&
3062 Ty->getNumElements() &&
3065 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3066 E.value().second == (int)
E.index());
3068 IdentityLeafs.
insert(FrontU);
3073 C &&
C->getSplatValue() &&
3081 SplatLeafs.
insert(FrontU);
3086 auto [FrontU, FrontLane] = Item.
front();
3087 auto [
U, Lane] = IL;
3088 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3090 SplatLeafs.
insert(FrontU);
3096 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3100 Value *
V = IL.first->get();
3106 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3109 if (CI->getSrcTy()->getScalarType() !=
3114 SI->getOperand(0)->getType() !=
3121 II->getIntrinsicID() ==
3123 !
II->hasOperandBundles());
3130 BO && BO->isIntDivRem())
3135 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3136 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3143 if (DstTy && SrcTy &&
3144 SrcTy->getNumElements() == DstTy->getNumElements()) {
3155 !
II->hasOperandBundles()) {
3156 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3175 ConcatLeafs.
insert(FrontU);
3182 if (NumVisited <= 1)
3185 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3191 ConcatLeafs, Builder, &
TTI);
3192 replaceValue(
I, *V);
3199bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3203 switch (
II->getIntrinsicID()) {
3204 case Intrinsic::vector_reduce_add:
3205 case Intrinsic::vector_reduce_mul:
3206 case Intrinsic::vector_reduce_and:
3207 case Intrinsic::vector_reduce_or:
3208 case Intrinsic::vector_reduce_xor:
3209 case Intrinsic::vector_reduce_smin:
3210 case Intrinsic::vector_reduce_smax:
3211 case Intrinsic::vector_reduce_umin:
3212 case Intrinsic::vector_reduce_umax:
3221 std::queue<Value *> Worklist;
3222 SmallPtrSet<Value *, 4> Visited;
3223 ShuffleVectorInst *Shuffle =
nullptr;
3227 while (!Worklist.empty()) {
3228 Value *CV = Worklist.front();
3240 if (CI->isBinaryOp()) {
3241 for (
auto *
Op : CI->operand_values())
3245 if (Shuffle && Shuffle != SV)
3262 for (
auto *V : Visited)
3263 for (
auto *U :
V->users())
3264 if (!Visited.contains(U) && U != &
I)
3267 FixedVectorType *VecType =
3271 FixedVectorType *ShuffleInputType =
3273 if (!ShuffleInputType)
3279 SmallVector<int> ConcatMask;
3281 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3282 bool UsesSecondVec =
3283 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3290 ShuffleInputType, ConcatMask,
CostKind);
3292 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3294 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3296 bool MadeChanges =
false;
3297 if (NewCost < OldCost) {
3301 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3302 replaceValue(*Shuffle, *NewShuffle);
3308 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3354bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3356 std::queue<Value *> InstWorklist;
3360 std::optional<unsigned int> CommonCallOp = std::nullopt;
3361 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3363 bool IsFirstCallOrBinInst =
true;
3364 bool ShouldBeCallOrBinInst =
true;
3370 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3380 int64_t
VecSize = FVT->getNumElements();
3386 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3387 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3397 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3398 Cur = (Cur + 1) / 2, --
Mask) {
3400 ExpectedParityMask |= (1ll <<
Mask);
3403 InstWorklist.push(VecOpEE);
3405 while (!InstWorklist.empty()) {
3406 Value *CI = InstWorklist.front();
3410 if (!ShouldBeCallOrBinInst)
3413 if (!IsFirstCallOrBinInst &&
3414 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3419 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3421 IsFirstCallOrBinInst =
false;
3424 CommonCallOp =
II->getIntrinsicID();
3425 if (
II->getIntrinsicID() != *CommonCallOp)
3428 switch (
II->getIntrinsicID()) {
3429 case Intrinsic::umin:
3430 case Intrinsic::umax:
3431 case Intrinsic::smin:
3432 case Intrinsic::smax: {
3433 auto *Op0 =
II->getOperand(0);
3434 auto *Op1 =
II->getOperand(1);
3442 ShouldBeCallOrBinInst ^= 1;
3444 IntrinsicCostAttributes ICA(
3445 *CommonCallOp,
II->getType(),
3446 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3453 InstWorklist.push(PrevVecV[1]);
3454 InstWorklist.push(PrevVecV[0]);
3458 if (!ShouldBeCallOrBinInst)
3461 if (!IsFirstCallOrBinInst &&
3462 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3465 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3467 IsFirstCallOrBinInst =
false;
3475 switch (*CommonBinOp) {
3476 case BinaryOperator::Add:
3477 case BinaryOperator::Mul:
3478 case BinaryOperator::Or:
3479 case BinaryOperator::And:
3480 case BinaryOperator::Xor: {
3490 ShouldBeCallOrBinInst ^= 1;
3497 InstWorklist.push(PrevVecV[1]);
3498 InstWorklist.push(PrevVecV[0]);
3502 if (ShouldBeCallOrBinInst ||
3503 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3506 if (SVInst != PrevVecV[1])
3509 ArrayRef<int> CurMask;
3515 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3516 if (Mask < ShuffleMaskHalf &&
3517 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3519 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3524 ShuffleMaskHalf *= 2;
3525 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3526 ExpectedParityMask >>= 1;
3529 SVInst->getType(), SVInst->getType(),
3533 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3536 ShouldBeCallOrBinInst ^= 1;
3543 if (ShouldBeCallOrBinInst)
3546 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3548 Value *FinalVecV = PrevVecV[0];
3560 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
3563 if (NewCost >= OrigCost)
3566 auto *ReducedResult =
3568 replaceValue(
I, *ReducedResult);
3577bool VectorCombine::foldCastFromReductions(Instruction &
I) {
3582 bool TruncOnly =
false;
3585 case Intrinsic::vector_reduce_add:
3586 case Intrinsic::vector_reduce_mul:
3589 case Intrinsic::vector_reduce_and:
3590 case Intrinsic::vector_reduce_or:
3591 case Intrinsic::vector_reduce_xor:
3598 Value *ReductionSrc =
I.getOperand(0);
3610 Type *ResultTy =
I.getType();
3613 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
3623 if (OldCost <= NewCost || !NewCost.
isValid())
3627 II->getIntrinsicID(), {Src});
3629 replaceValue(
I, *NewCast);
3638 constexpr unsigned MaxVisited = 32;
3641 bool FoundReduction =
false;
3644 while (!WorkList.
empty()) {
3646 for (
User *U :
I->users()) {
3648 if (!UI || !Visited.
insert(UI).second)
3650 if (Visited.
size() > MaxVisited)
3656 switch (
II->getIntrinsicID()) {
3657 case Intrinsic::vector_reduce_add:
3658 case Intrinsic::vector_reduce_mul:
3659 case Intrinsic::vector_reduce_and:
3660 case Intrinsic::vector_reduce_or:
3661 case Intrinsic::vector_reduce_xor:
3662 case Intrinsic::vector_reduce_smin:
3663 case Intrinsic::vector_reduce_smax:
3664 case Intrinsic::vector_reduce_umin:
3665 case Intrinsic::vector_reduce_umax:
3666 FoundReduction =
true;
3679 return FoundReduction;
3692bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
3697 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3705 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
3707 if (!
I ||
I->getOperand(0)->getType() != VT)
3709 return any_of(
I->users(), [&](User *U) {
3710 return U != Op0 && U != Op1 &&
3711 !(isa<ShuffleVectorInst>(U) &&
3712 (InputShuffles.contains(cast<Instruction>(U)) ||
3713 isInstructionTriviallyDead(cast<Instruction>(U))));
3716 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3717 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3725 for (
auto *U :
I->users()) {
3727 if (!SV || SV->getType() != VT)
3729 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3730 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3737 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3741 if (FromReduction && Shuffles.
size() > 1)
3746 if (!FromReduction) {
3747 for (ShuffleVectorInst *SV : Shuffles) {
3748 for (
auto *U : SV->users()) {
3751 Shuffles.push_back(SSV);
3763 int MaxV1Elt = 0, MaxV2Elt = 0;
3764 unsigned NumElts = VT->getNumElements();
3765 for (ShuffleVectorInst *SVN : Shuffles) {
3766 SmallVector<int>
Mask;
3767 SVN->getShuffleMask(Mask);
3771 Value *SVOp0 = SVN->getOperand(0);
3772 Value *SVOp1 = SVN->getOperand(1);
3777 for (
int &Elem : Mask) {
3783 if (SVOp0 == Op1 && SVOp1 == Op0) {
3787 if (SVOp0 != Op0 || SVOp1 != Op1)
3793 SmallVector<int> ReconstructMask;
3794 for (
unsigned I = 0;
I <
Mask.size();
I++) {
3797 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
3798 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
3799 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
3800 return Mask[
I] ==
A.first;
3809 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
3810 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
3811 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
3825 sort(ReconstructMask);
3826 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
3834 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
3835 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
3847 if (InputShuffles.contains(SSV))
3849 return SV->getMaskValue(M);
3857 std::pair<int, int>
Y) {
3858 int MXA = GetBaseMaskValue(
A,
X.first);
3859 int MYA = GetBaseMaskValue(
A,
Y.first);
3862 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3863 return SortBase(SVI0A,
A,
B);
3865 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3866 return SortBase(SVI1A,
A,
B);
3871 for (
const auto &Mask : OrigReconstructMasks) {
3872 SmallVector<int> ReconstructMask;
3873 for (
int M : Mask) {
3875 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
3876 assert(It !=
V.end() &&
"Expected all entries in Mask");
3877 return std::distance(
V.begin(), It);
3881 else if (M <
static_cast<int>(NumElts)) {
3882 ReconstructMask.
push_back(FindIndex(V1, M));
3884 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
3887 ReconstructMasks.
push_back(std::move(ReconstructMask));
3892 SmallVector<int> V1A, V1B, V2A, V2B;
3893 for (
unsigned I = 0;
I < V1.
size();
I++) {
3894 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
3895 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
3897 for (
unsigned I = 0;
I < V2.
size();
I++) {
3898 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
3899 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
3901 while (V1A.
size() < NumElts) {
3905 while (V2A.
size() < NumElts) {
3917 VT, VT, SV->getShuffleMask(),
CostKind);
3924 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
3925 unsigned MaxVectorSize =
3927 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
3928 if (MaxElementsInVector == 0)
3937 std::set<SmallVector<int, 4>> UniqueShuffles;
3942 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
3943 if (NumFullVectors < 2)
3944 return C + ShuffleCost;
3945 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
3946 unsigned NumUniqueGroups = 0;
3947 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
3950 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
3951 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
3952 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
3953 if (UniqueShuffles.insert(SubShuffle).second)
3954 NumUniqueGroups += 1;
3956 return C + ShuffleCost * NumUniqueGroups / NumGroups;
3962 SmallVector<int, 16>
Mask;
3963 SV->getShuffleMask(Mask);
3964 return AddShuffleMaskAdjustedCost(
C, Mask);
3967 auto AllShufflesHaveSameOperands =
3968 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
3969 if (InputShuffles.size() < 2)
3971 ShuffleVectorInst *FirstSV =
3978 std::next(InputShuffles.begin()), InputShuffles.end(),
3979 [&](Instruction *
I) {
3980 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
3981 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
3990 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
3992 if (AllShufflesHaveSameOperands(InputShuffles)) {
3993 UniqueShuffles.clear();
3994 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3997 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4003 FixedVectorType *Op0SmallVT =
4005 FixedVectorType *Op1SmallVT =
4010 UniqueShuffles.clear();
4011 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
4013 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
4015 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
4018 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
4020 <<
" vs CostAfter: " << CostAfter <<
"\n");
4021 if (CostBefore < CostAfter ||
4032 if (InputShuffles.contains(SSV))
4034 return SV->getOperand(
Op);
4038 GetShuffleOperand(SVI0A, 1), V1A);
4041 GetShuffleOperand(SVI0B, 1), V1B);
4044 GetShuffleOperand(SVI1A, 1), V2A);
4047 GetShuffleOperand(SVI1B, 1), V2B);
4052 I->copyIRFlags(Op0,
true);
4057 I->copyIRFlags(Op1,
true);
4059 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
4062 replaceValue(*Shuffles[S], *NSV,
false);
4065 Worklist.pushValue(NSV0A);
4066 Worklist.pushValue(NSV0B);
4067 Worklist.pushValue(NSV1A);
4068 Worklist.pushValue(NSV1B);
4078bool VectorCombine::shrinkType(Instruction &
I) {
4079 Value *ZExted, *OtherOperand;
4085 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4089 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4091 if (
I.getOpcode() == Instruction::LShr) {
4108 Instruction::ZExt, BigTy, SmallTy,
4109 TargetTransformInfo::CastContextHint::None,
CostKind);
4114 for (User *U : ZExtOperand->
users()) {
4121 ShrinkCost += ZExtCost;
4136 ShrinkCost += ZExtCost;
4143 Instruction::Trunc, SmallTy, BigTy,
4144 TargetTransformInfo::CastContextHint::None,
CostKind);
4149 if (ShrinkCost > CurrentCost)
4153 Value *Op0 = ZExted;
4156 if (
I.getOperand(0) == OtherOperand)
4163 replaceValue(
I, *NewZExtr);
4169bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
4170 Value *DstVec, *SrcVec;
4171 uint64_t ExtIdx, InsIdx;
4181 if (!DstVecTy || !SrcVecTy ||
4182 SrcVecTy->getElementType() != DstVecTy->getElementType())
4185 unsigned NumDstElts = DstVecTy->getNumElements();
4186 unsigned NumSrcElts = SrcVecTy->getNumElements();
4187 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4194 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4195 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4197 if (NeedDstSrcSwap) {
4199 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4202 Mask[InsIdx] = ExtIdx;
4206 std::iota(
Mask.begin(),
Mask.end(), 0);
4207 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4208 Mask[InsIdx] = NumDstElts;
4210 Mask[InsIdx] = ExtIdx + NumDstElts;
4223 SmallVector<int> ExtToVecMask;
4224 if (!NeedExpOrNarrow) {
4229 nullptr, {DstVec, SrcVec});
4235 if (IsExtIdxInBounds)
4236 ExtToVecMask[ExtIdx] = ExtIdx;
4238 ExtToVecMask[0] = ExtIdx;
4241 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4245 if (!
Ext->hasOneUse())
4248 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4249 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4252 if (OldCost < NewCost)
4255 if (NeedExpOrNarrow) {
4256 if (!NeedDstSrcSwap)
4269 replaceValue(
I, *Shuf);
4278bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
4279 const APInt *SplatVal0, *SplatVal1;
4289 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4290 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4299 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4300 << *
I.getType() <<
" is too high.\n");
4304 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4305 NewSplatVal <<= Width;
4306 NewSplatVal |= SplatVal0->
zext(Width * 2);
4308 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4316bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
4318 if (!OldLoad || !OldLoad->isSimple())
4325 unsigned const OldNumElements = OldLoadTy->getNumElements();
4331 using IndexRange = std::pair<int, int>;
4332 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4333 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4334 for (llvm::Use &Use :
I.uses()) {
4336 User *Shuffle =
Use.getUser();
4341 return std::nullopt;
4348 for (
int Index : Mask) {
4349 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4350 OutputRange.first = std::min(Index, OutputRange.first);
4351 OutputRange.second = std::max(Index, OutputRange.second);
4356 if (OutputRange.second < OutputRange.first)
4357 return std::nullopt;
4363 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4364 unsigned const NewNumElements = Indices->second + 1u;
4368 if (NewNumElements < OldNumElements) {
4373 Type *ElemTy = OldLoadTy->getElementType();
4375 Value *PtrOp = OldLoad->getPointerOperand();
4378 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4379 OldLoad->getPointerAddressSpace(),
CostKind);
4382 OldLoad->getPointerAddressSpace(),
CostKind);
4384 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4386 unsigned const MaxIndex = NewNumElements * 2u;
4388 for (llvm::Use &Use :
I.uses()) {
4390 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
4396 for (
int Index : OldMask) {
4397 if (Index >=
static_cast<int>(MaxIndex))
4411 dbgs() <<
"Found a load used only by shufflevector instructions: "
4412 <<
I <<
"\n OldCost: " << OldCost
4413 <<
" vs NewCost: " << NewCost <<
"\n");
4415 if (OldCost < NewCost || !NewCost.
isValid())
4421 NewLoad->copyMetadata(
I);
4424 for (UseEntry &Use : NewUses) {
4425 ShuffleVectorInst *Shuffle =
Use.first;
4426 std::vector<int> &NewMask =
Use.second;
4433 replaceValue(*Shuffle, *NewShuffle,
false);
4446bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
4448 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4452 ArrayRef<int> Mask0;
4453 ArrayRef<int> Mask1;
4466 auto const InputNumElements = InputVT->getNumElements();
4468 if (InputNumElements >= ResultVT->getNumElements())
4473 SmallVector<int, 16> NewMask;
4476 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4477 if (
M0 >= 0 &&
M1 >= 0)
4479 else if (
M0 == -1 &&
M1 == -1)
4492 int MaskOffset = NewMask[0
u];
4493 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
4496 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4510 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4513 if (NewCost > OldCost)
4525 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4527 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4533 replaceValue(*Phi, *NewShuf1);
4539bool VectorCombine::run() {
4553 auto Opcode =
I.getOpcode();
4561 if (IsFixedVectorType) {
4563 case Instruction::InsertElement:
4564 if (vectorizeLoadInsert(
I))
4567 case Instruction::ShuffleVector:
4568 if (widenSubvectorLoad(
I))
4579 if (scalarizeOpOrCmp(
I))
4581 if (scalarizeLoadExtract(
I))
4583 if (scalarizeExtExtract(
I))
4585 if (scalarizeVPIntrinsic(
I))
4587 if (foldInterleaveIntrinsics(
I))
4591 if (Opcode == Instruction::Store)
4592 if (foldSingleElementStore(
I))
4596 if (TryEarlyFoldsOnly)
4603 if (IsFixedVectorType) {
4605 case Instruction::InsertElement:
4606 if (foldInsExtFNeg(
I))
4608 if (foldInsExtBinop(
I))
4610 if (foldInsExtVectorToShuffle(
I))
4613 case Instruction::ShuffleVector:
4614 if (foldPermuteOfBinops(
I))
4616 if (foldShuffleOfBinops(
I))
4618 if (foldShuffleOfSelects(
I))
4620 if (foldShuffleOfCastops(
I))
4622 if (foldShuffleOfShuffles(
I))
4624 if (foldShuffleOfIntrinsics(
I))
4626 if (foldSelectShuffle(
I))
4628 if (foldShuffleToIdentity(
I))
4631 case Instruction::Load:
4632 if (shrinkLoadForShuffles(
I))
4635 case Instruction::BitCast:
4636 if (foldBitcastShuffle(
I))
4639 case Instruction::And:
4640 case Instruction::Or:
4641 case Instruction::Xor:
4642 if (foldBitOpOfCastops(
I))
4644 if (foldBitOpOfCastConstant(
I))
4647 case Instruction::PHI:
4648 if (shrinkPhiOfShuffles(
I))
4658 case Instruction::Call:
4659 if (foldShuffleFromReductions(
I))
4661 if (foldCastFromReductions(
I))
4664 case Instruction::ExtractElement:
4665 if (foldShuffleChainsToReduce(
I))
4668 case Instruction::ICmp:
4669 case Instruction::FCmp:
4670 if (foldExtractExtract(
I))
4673 case Instruction::Or:
4674 if (foldConcatOfBoolMasks(
I))
4679 if (foldExtractExtract(
I))
4681 if (foldExtractedCmps(
I))
4683 if (foldBinopOfReductions(
I))
4692 bool MadeChange =
false;
4693 for (BasicBlock &BB :
F) {
4705 if (!
I->isDebugOrPseudoInst())
4706 MadeChange |= FoldInst(*
I);
4713 while (!Worklist.isEmpty()) {
4723 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.