36#define DEBUG_TYPE "vector-combine"
42STATISTIC(NumVecLoad,
"Number of vector loads formed");
43STATISTIC(NumVecCmp,
"Number of vector compares formed");
44STATISTIC(NumVecBO,
"Number of vector binops formed");
45STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
46STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
47STATISTIC(NumScalarBO,
"Number of scalar binops formed");
48STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
52 cl::desc(
"Disable all vector combine transforms"));
56 cl::desc(
"Disable binop extract to shuffle transforms"));
60 cl::desc(
"Max number of instructions to scan for vector combining."));
62static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
70 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
71 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
86 bool TryEarlyFoldsOnly;
97 unsigned PreferredExtractIndex)
const;
101 unsigned PreferredExtractIndex);
120 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
124 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
144 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
145 V = BitCast->getOperand(0);
153 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
154 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
160 Type *ScalarTy = Load->getType()->getScalarType();
163 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
170bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
184 auto *
Load = dyn_cast<LoadInst>(
X);
196 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
197 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
199 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
200 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
201 unsigned OffsetEltIndex = 0;
209 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
220 uint64_t ScalarSizeInBytes = ScalarSize / 8;
221 if (
Offset.urem(ScalarSizeInBytes) != 0)
225 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
226 if (OffsetEltIndex >= MinVecNumElts)
243 unsigned AS =
Load->getPointerAddressSpace();
262 auto *Ty = cast<FixedVectorType>(
I.getType());
263 unsigned OutputNumElts = Ty->getNumElements();
265 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
266 Mask[0] = OffsetEltIndex;
272 if (OldCost < NewCost || !NewCost.
isValid())
283 replaceValue(
I, *VecLd);
293 auto *Shuf = cast<ShuffleVectorInst>(&
I);
294 if (!Shuf->isIdentityWithPadding())
299 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
300 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
301 return M >= (int)(NumOpElts);
304 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
311 auto *Ty = cast<FixedVectorType>(
I.getType());
312 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
313 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
320 unsigned AS =
Load->getPointerAddressSpace();
335 if (OldCost < NewCost || !NewCost.
isValid())
342 replaceValue(
I, *VecLd);
354 assert(Index0C && Index1C &&
"Expected constant extract indexes");
356 unsigned Index0 = Index0C->getZExtValue();
357 unsigned Index1 = Index1C->getZExtValue();
360 if (Index0 == Index1)
385 if (PreferredExtractIndex == Index0)
387 if (PreferredExtractIndex == Index1)
391 return Index0 > Index1 ? Ext0 : Ext1;
403 unsigned PreferredExtractIndex) {
404 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->
getOperand(1));
405 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->
getOperand(1));
406 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
408 unsigned Opcode =
I.getOpcode();
419 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
420 "Expected a compare");
430 unsigned Ext0Index = Ext0IndexC->getZExtValue();
431 unsigned Ext1Index = Ext1IndexC->getZExtValue();
446 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
456 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
458 OldCost = CheapExtractCost + ScalarOpCost;
459 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
463 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
464 NewCost = VectorOpCost + CheapExtractCost +
469 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
470 if (ConvertToShuffle) {
488 return OldCost < NewCost;
498 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
500 ShufMask[NewIndex] = OldIndex;
519 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
520 if (isa<Constant>(
X))
533 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
536 "Expected matching constant extract indexes");
544 replaceValue(
I, *NewExt);
552 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
555 "Expected matching constant extract indexes");
565 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
566 VecBOInst->copyIRFlags(&
I);
569 replaceValue(
I, *NewExt);
597 auto *Ext0 = cast<ExtractElementInst>(I0);
598 auto *Ext1 = cast<ExtractElementInst>(I1);
605 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
608 if (ExtractToChange) {
609 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
614 if (ExtractToChange == Ext0)
621 foldExtExtCmp(Ext0, Ext1,
I);
623 foldExtExtBinop(Ext0, Ext1,
I);
650 auto *VecTy = cast<FixedVectorType>(
I.getType());
651 if (SrcVec->
getType() != VecTy)
655 unsigned NumElts = VecTy->getNumElements();
656 if (
Index >= NumElts)
663 std::iota(
Mask.begin(),
Mask.end(), 0);
682 if (NewCost > OldCost)
689 replaceValue(
I, *Shuf);
708 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
709 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
710 if (!DestTy || !SrcTy)
713 unsigned DestEltSize = DestTy->getScalarSizeInBits();
714 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
715 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
718 bool IsUnary = isa<UndefValue>(V1);
725 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
726 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
731 if (DestEltSize <= SrcEltSize) {
734 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
735 unsigned ScaleFactor = SrcEltSize / DestEltSize;
740 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
741 unsigned ScaleFactor = DestEltSize / SrcEltSize;
748 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
753 unsigned NumOps = IsUnary ? 1 : 2;
765 TargetTransformInfo::CastContextHint::None,
770 TargetTransformInfo::CastContextHint::None, CK);
771 if (DestCost > SrcCost || !DestCost.
isValid())
779 replaceValue(
I, *Shuf);
786bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
787 if (!isa<VPIntrinsic>(
I))
800 if (!ScalarOp0 || !ScalarOp1)
808 auto IsAllTrueMask = [](
Value *MaskVal) {
810 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
811 return ConstValue->isAllOnesValue();
827 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
828 Mask.resize(FVTy->getNumElements(), 0);
836 Args.push_back(
V->getType());
842 std::optional<unsigned> FunctionalOpcode =
844 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
845 if (!FunctionalOpcode) {
869 <<
", Cost of scalarizing:" << NewCost <<
"\n");
872 if (OldCost < NewCost || !NewCost.
isValid())
883 bool SafeToSpeculate;
886 .
hasFnAttr(Attribute::AttrKind::Speculatable);
889 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
890 if (!SafeToSpeculate &&
897 {ScalarOp0, ScalarOp1})
899 ScalarOp0, ScalarOp1);
907bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
918 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
920 for (
User *U :
I.users())
930 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
931 Value *V0 =
nullptr, *V1 =
nullptr;
944 if (IsConst0 && IsConst1)
946 if (!IsConst0 && !IsConst1 && Index0 != Index1)
951 auto *I0 = dyn_cast_or_null<Instruction>(V0);
952 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
953 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
959 Type *VecTy =
I.getType();
964 "Unexpected types for insert element into binop or cmp");
966 unsigned Opcode =
I.getOpcode();
985 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
987 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
988 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
991 if (OldCost < NewCost || !NewCost.
isValid())
1011 Scalar->setName(
I.getName() +
".scalar");
1015 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1016 ScalarInst->copyIRFlags(&
I);
1020 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1023 replaceValue(
I, *Insert);
1033 if (!
I.isBinaryOp() || !
I.getType()->isIntegerTy(1))
1039 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1057 auto *Ext0 = cast<ExtractElementInst>(I0);
1058 auto *Ext1 = cast<ExtractElementInst>(I1);
1067 : Instruction::ICmp;
1068 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1085 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1086 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1091 ShufMask[CheapIndex] = ExpensiveIndex;
1100 if (OldCost < NewCost || !NewCost.
isValid())
1114 replaceValue(
I, *NewExt);
1123 unsigned NumScanned = 0;
1133class ScalarizationResult {
1134 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1139 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1143 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1144 ~ScalarizationResult() {
1145 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1148 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1149 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1150 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1151 return {StatusTy::SafeWithFreeze, ToFreeze};
1155 bool isSafe()
const {
return Status == StatusTy::Safe; }
1157 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1160 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1165 Status = StatusTy::Unsafe;
1170 assert(isSafeWithFreeze() &&
1171 "should only be used when freezing is required");
1173 "UserI must be a user of ToFreeze");
1179 if (
U.get() == ToFreeze)
1196 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1198 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1199 if (
C->getValue().ult(NumElements))
1200 return ScalarizationResult::safe();
1201 return ScalarizationResult::unsafe();
1204 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1205 APInt Zero(IntWidth, 0);
1206 APInt MaxElts(IntWidth, NumElements);
1212 true, &AC, CtxI, &DT)))
1213 return ScalarizationResult::safe();
1214 return ScalarizationResult::unsafe();
1227 if (ValidIndices.
contains(IdxRange))
1228 return ScalarizationResult::safeWithFreeze(IdxBase);
1229 return ScalarizationResult::unsafe();
1239 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1241 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1253bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1254 auto *
SI = cast<StoreInst>(&
I);
1255 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1263 if (!
match(
SI->getValueOperand(),
1268 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1269 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1270 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1273 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1274 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1275 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1279 if (ScalarizableIdx.isUnsafe() ||
1284 if (ScalarizableIdx.isSafeWithFreeze())
1285 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1287 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1288 {ConstantInt::get(Idx->getType(), 0), Idx});
1295 replaceValue(
I, *NSI);
1304bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1309 auto *VecTy = cast<VectorType>(
I.getType());
1310 auto *LI = cast<LoadInst>(&
I);
1311 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1316 LI->getPointerAddressSpace());
1320 unsigned NumInstChecked = 0;
1324 for (
auto &Pair : NeedFreeze)
1325 Pair.second.discard();
1332 auto *UI = dyn_cast<ExtractElementInst>(U);
1333 if (!UI || UI->getParent() != LI->getParent())
1340 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1347 LastCheckedInst = UI;
1351 if (ScalarIdx.isUnsafe())
1353 if (ScalarIdx.isSafeWithFreeze()) {
1355 ScalarIdx.discard();
1358 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1365 Align(1), LI->getPointerAddressSpace());
1369 if (ScalarizedCost >= OriginalCost)
1374 auto *EI = cast<ExtractElementInst>(U);
1378 auto It = NeedFreeze.
find(EI);
1379 if (It != NeedFreeze.
end())
1380 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1385 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1386 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1389 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1390 NewLoad->setAlignment(ScalarOpAlignment);
1392 replaceValue(*EI, *NewLoad);
1395 FailureGuard.release();
1400bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1417 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1418 auto *BinOpTy = dyn_cast<FixedVectorType>(B0->
getType());
1419 if (!ShuffleDstTy || !BinOpTy)
1422 unsigned NumSrcElts = BinOpTy->getNumElements();
1427 if (BinaryOperator::isCommutative(Opcode) &&
X != Z &&
Y != W &&
1431 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1432 if (M >= (
int)NumSrcElts)
1459 OldMask,
CostKind, 0,
nullptr, {B0, B1}, &
I);
1467 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1469 if (NewCost >= OldCost)
1477 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1478 NewInst->copyIRFlags(B0);
1479 NewInst->andIRFlags(B1);
1484 replaceValue(
I, *NewBO);
1490bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1497 auto *C0 = dyn_cast<CastInst>(V0);
1498 auto *C1 = dyn_cast<CastInst>(V1);
1503 if (C0->getSrcTy() != C1->getSrcTy())
1507 if (Opcode != C1->getOpcode()) {
1509 Opcode = Instruction::SExt;
1514 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1515 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1516 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1517 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1520 unsigned NumSrcElts = CastSrcTy->getNumElements();
1521 unsigned NumDstElts = CastDstTy->getNumElements();
1522 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1523 "Only bitcasts expected to alter src/dst element counts");
1527 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1528 (NumDstElts % NumSrcElts) != 0)
1532 if (NumSrcElts >= NumDstElts) {
1535 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1536 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1541 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1542 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1547 auto *NewShuffleDstTy =
1560 OldMask,
CostKind, 0,
nullptr, std::nullopt, &
I);
1568 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1570 if (NewCost > OldCost)
1578 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1579 NewInst->copyIRFlags(C0);
1580 NewInst->andIRFlags(C1);
1584 replaceValue(
I, *Cast);
1590bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1601 auto *ShufI0 = dyn_cast<Instruction>(
I.getOperand(0));
1602 auto *ShufI1 = dyn_cast<Instruction>(
I.getOperand(1));
1603 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1604 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(V0->
getType());
1605 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(
I.getOperand(0)->getType());
1606 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1610 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1611 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1614 if ((!isa<PoisonValue>(U0) &&
1615 any_of(InnerMask0, [&](
int M) {
return M >= (int)NumSrcElts; })) ||
1616 (!isa<PoisonValue>(U1) &&
1617 any_of(InnerMask1, [&](
int M) {
return M >= (int)NumSrcElts; })))
1622 for (
int &M : NewMask) {
1623 if (0 <= M && M < (
int)NumImmElts) {
1625 }
else if (M >= (
int)NumImmElts) {
1626 if (InnerMask1[M - NumImmElts] >= (
int)NumSrcElts)
1629 M = InnerMask1[
M - NumImmElts] + (V0 == V1 ? 0 : NumSrcElts);
1635 replaceValue(
I, *V0);
1644 InnerMask0,
CostKind, 0,
nullptr, {V0, U0}, ShufI0) +
1646 InnerMask1,
CostKind, 0,
nullptr, {V1, U1}, ShufI1) +
1648 OuterMask,
CostKind, 0,
nullptr, {ShufI0, ShufI1}, &
I);
1652 NewMask,
CostKind, 0,
nullptr, {V0, V1});
1655 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1657 if (NewCost > OldCost)
1661 if (
none_of(NewMask, [&](
int M) {
return 0 <=
M &&
M < (int)NumSrcElts; }))
1663 if (
none_of(NewMask, [&](
int M) {
return (
int)NumSrcElts <= M; }))
1667 replaceValue(
I, *Shuf);
1674bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
1675 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
1676 if (!Ty || !isa<Instruction>(
I.getOperand(0)) ||
1677 !isa<Instruction>(
I.getOperand(1)))
1680 using InstLane = std::pair<Value *, int>;
1682 auto LookThroughShuffles = [](
Value *
V,
int Lane) -> InstLane {
1683 while (
auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
1685 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
1686 int M = SV->getMaskValue(Lane);
1689 else if (M < (
int)NumElts) {
1690 V = SV->getOperand(0);
1693 V = SV->getOperand(1);
1697 return InstLane{
V, Lane};
1700 auto GenerateInstLaneVectorFromOperand =
1703 for (InstLane V : Item) {
1707 : LookThroughShuffles(
1708 cast<Instruction>(
V.first)->getOperand(
Op),
V.second));
1714 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
1715 Start[
M] = LookThroughShuffles(&
I, M);
1720 unsigned NumVisited = 0;
1722 while (!Worklist.
empty()) {
1732 if (Item[0].second == 0 && Item[0].first->getType() == Ty &&
1734 return !E.value().first || (E.value().first == Item[0].first &&
1735 E.value().second == (
int)E.index());
1737 IdentityLeafs.
insert(Item[0].first);
1743 (IL.first == Item[0].first && IL.second == Item[0].second);
1745 SplatLeafs.
insert(Item[0].first);
1754 if (
auto *
I = dyn_cast<Instruction>(IL.first);
I && !
I->hasOneUse())
1756 if (IL.first->getValueID() != Item[0].first->getValueID())
1758 if (isa<CallInst>(IL.first) && !isa<IntrinsicInst>(IL.first))
1760 auto *II = dyn_cast<IntrinsicInst>(IL.first);
1762 (isa<IntrinsicInst>(Item[0].first) &&
1763 II->getIntrinsicID() ==
1764 cast<IntrinsicInst>(Item[0].first)->getIntrinsicID());
1770 if (isa<BinaryOperator>(Item[0].first) &&
1771 !cast<BinaryOperator>(Item[0].first)->isIntDivRem()) {
1772 Worklist.
push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1773 Worklist.
push_back(GenerateInstLaneVectorFromOperand(Item, 1));
1774 }
else if (isa<UnaryOperator>(Item[0].first)) {
1775 Worklist.
push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1785 if (IdentityLeafs.
contains(Item[0].first) &&
1787 return !E.value().first || (E.value().first == Item[0].first &&
1788 E.value().second == (
int)E.index());
1790 return Item[0].first;
1792 if (SplatLeafs.
contains(Item[0].first)) {
1793 if (
auto ILI = dyn_cast<Instruction>(Item[0].first))
1795 else if (isa<Argument>(Item[0].first))
1801 auto *
I = cast<Instruction>(Item[0].first);
1803 for (
unsigned Idx = 0, E =
I->getNumOperands();
Idx < E;
Idx++)
1804 Ops[
Idx] = Generate(GenerateInstLaneVectorFromOperand(Item,
Idx));
1806 if (
auto BI = dyn_cast<BinaryOperator>(
I))
1809 assert(isa<UnaryInstruction>(
I) &&
1810 "Unexpected instruction type in Generate");
1814 Value *
V = Generate(Start);
1815 replaceValue(
I, *V);
1822bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
1823 auto *II = dyn_cast<IntrinsicInst>(&
I);
1826 switch (II->getIntrinsicID()) {
1827 case Intrinsic::vector_reduce_add:
1828 case Intrinsic::vector_reduce_mul:
1829 case Intrinsic::vector_reduce_and:
1830 case Intrinsic::vector_reduce_or:
1831 case Intrinsic::vector_reduce_xor:
1832 case Intrinsic::vector_reduce_smin:
1833 case Intrinsic::vector_reduce_smax:
1834 case Intrinsic::vector_reduce_umin:
1835 case Intrinsic::vector_reduce_umax:
1844 std::queue<Value *> Worklist;
1847 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
1850 while (!Worklist.empty()) {
1851 Value *CV = Worklist.front();
1862 if (
auto *CI = dyn_cast<Instruction>(CV)) {
1863 if (CI->isBinaryOp()) {
1864 for (
auto *
Op : CI->operand_values())
1867 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
1868 if (Shuffle && Shuffle != SV)
1885 for (
auto *V : Visited)
1886 for (
auto *U :
V->users())
1887 if (!Visited.contains(U) && U != &
I)
1891 dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
1896 if (!ShuffleInputType)
1904 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
1908 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
1909 bool UsesSecondVec =
1910 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
1913 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
1919 VecTyForCost, ConcatMask);
1921 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
1923 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1925 if (NewCost < OldCost) {
1929 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
1930 replaceValue(*Shuffle, *NewShuffle);
1935 return foldSelectShuffle(*Shuffle,
true);
1940bool VectorCombine::foldTruncFromReductions(
Instruction &
I) {
1941 auto *II = dyn_cast<IntrinsicInst>(&
I);
1947 case Intrinsic::vector_reduce_add:
1948 case Intrinsic::vector_reduce_mul:
1949 case Intrinsic::vector_reduce_and:
1950 case Intrinsic::vector_reduce_or:
1951 case Intrinsic::vector_reduce_xor:
1958 Value *ReductionSrc =
I.getOperand(0);
1964 auto *Trunc = cast<CastInst>(ReductionSrc);
1965 auto *TruncSrcTy = cast<VectorType>(TruncSrc->
getType());
1966 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
1967 Type *ResultTy =
I.getType();
1979 ReductionSrcTy->getScalarType(),
1982 if (OldCost <= NewCost || !NewCost.
isValid())
1986 TruncSrcTy->getScalarType(), II->getIntrinsicID(), {TruncSrc});
1988 replaceValue(
I, *NewTruncation);
2002bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2003 auto *SVI = cast<ShuffleVectorInst>(&
I);
2004 auto *VT = cast<FixedVectorType>(
I.getType());
2005 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2006 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2007 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2011 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2012 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2013 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2014 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2017 if (!
I ||
I->getOperand(0)->getType() != VT)
2020 return U != Op0 && U != Op1 &&
2021 !(isa<ShuffleVectorInst>(U) &&
2022 (InputShuffles.contains(cast<Instruction>(U)) ||
2023 isInstructionTriviallyDead(cast<Instruction>(U))));
2026 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2027 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2035 for (
auto *U :
I->users()) {
2036 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2037 if (!SV || SV->getType() != VT)
2039 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2040 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2047 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2051 if (FromReduction && Shuffles.
size() > 1)
2056 if (!FromReduction) {
2058 for (
auto *U : SV->users()) {
2061 Shuffles.push_back(SSV);
2073 int MaxV1Elt = 0, MaxV2Elt = 0;
2074 unsigned NumElts = VT->getNumElements();
2077 SVN->getShuffleMask(Mask);
2081 Value *SVOp0 = SVN->getOperand(0);
2082 Value *SVOp1 = SVN->getOperand(1);
2083 if (isa<UndefValue>(SVOp1)) {
2084 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2087 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2093 if (SVOp0 == Op1 && SVOp1 == Op0) {
2097 if (SVOp0 != Op0 || SVOp1 != Op1)
2104 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2107 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2108 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2109 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2110 return Mask[
I] ==
A.first;
2119 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2120 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2121 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2124 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2127 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2135 sort(ReconstructMask);
2136 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2143 if (V1.
empty() ||
V2.empty() ||
2144 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2145 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2152 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2155 if (isa<UndefValue>(SV->getOperand(1)))
2156 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2157 if (InputShuffles.contains(SSV))
2159 return SV->getMaskValue(M);
2167 std::pair<int, int>
Y) {
2168 int MXA = GetBaseMaskValue(
A,
X.first);
2169 int MYA = GetBaseMaskValue(
A,
Y.first);
2172 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2173 return SortBase(SVI0A,
A,
B);
2175 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2176 return SortBase(SVI1A,
A,
B);
2181 for (
const auto &Mask : OrigReconstructMasks) {
2183 for (
int M : Mask) {
2185 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2186 assert(It !=
V.end() &&
"Expected all entries in Mask");
2187 return std::distance(
V.begin(), It);
2191 else if (M <
static_cast<int>(NumElts)) {
2192 ReconstructMask.
push_back(FindIndex(V1, M));
2194 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2197 ReconstructMasks.push_back(std::move(ReconstructMask));
2203 for (
unsigned I = 0;
I < V1.
size();
I++) {
2204 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2205 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2207 for (
unsigned I = 0;
I <
V2.size();
I++) {
2208 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2209 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2211 while (V1A.
size() < NumElts) {
2215 while (V2A.
size() < NumElts) {
2221 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2227 VT, SV->getShuffleMask());
2238 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2240 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2252 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2254 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2256 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2259 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2261 <<
" vs CostAfter: " << CostAfter <<
"\n");
2262 if (CostBefore <= CostAfter)
2267 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2270 if (isa<UndefValue>(SV->getOperand(1)))
2271 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2272 if (InputShuffles.contains(SSV))
2274 return SV->getOperand(
Op);
2278 GetShuffleOperand(SVI0A, 1), V1A);
2281 GetShuffleOperand(SVI0B, 1), V1B);
2284 GetShuffleOperand(SVI1A, 1), V2A);
2287 GetShuffleOperand(SVI1B, 1), V2B);
2291 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2292 I->copyIRFlags(Op0,
true);
2296 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2297 I->copyIRFlags(Op1,
true);
2299 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2302 replaceValue(*Shuffles[S], *NSV);
2305 Worklist.pushValue(NSV0A);
2306 Worklist.pushValue(NSV0B);
2307 Worklist.pushValue(NSV1A);
2308 Worklist.pushValue(NSV1B);
2309 for (
auto *S : Shuffles)
2316bool VectorCombine::run() {
2324 bool MadeChange =
false;
2327 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
2328 auto Opcode =
I.getOpcode();
2334 if (IsFixedVectorType) {
2336 case Instruction::InsertElement:
2337 MadeChange |= vectorizeLoadInsert(
I);
2339 case Instruction::ShuffleVector:
2340 MadeChange |= widenSubvectorLoad(
I);
2349 if (isa<VectorType>(
I.getType())) {
2350 MadeChange |= scalarizeBinopOrCmp(
I);
2351 MadeChange |= scalarizeLoadExtract(
I);
2352 MadeChange |= scalarizeVPIntrinsic(
I);
2355 if (Opcode == Instruction::Store)
2356 MadeChange |= foldSingleElementStore(
I);
2359 if (TryEarlyFoldsOnly)
2366 if (IsFixedVectorType) {
2368 case Instruction::InsertElement:
2369 MadeChange |= foldInsExtFNeg(
I);
2371 case Instruction::ShuffleVector:
2372 MadeChange |= foldShuffleOfBinops(
I);
2373 MadeChange |= foldShuffleOfCastops(
I);
2374 MadeChange |= foldShuffleOfShuffles(
I);
2375 MadeChange |= foldSelectShuffle(
I);
2376 MadeChange |= foldShuffleToIdentity(
I);
2378 case Instruction::BitCast:
2379 MadeChange |= foldBitcastShuffle(
I);
2384 case Instruction::Call:
2385 MadeChange |= foldShuffleFromReductions(
I);
2386 MadeChange |= foldTruncFromReductions(
I);
2388 case Instruction::ICmp:
2389 case Instruction::FCmp:
2390 MadeChange |= foldExtractExtract(
I);
2394 MadeChange |= foldExtractExtract(
I);
2395 MadeChange |= foldExtractedCmps(
I);
2408 if (
I.isDebugOrPseudoInst())
2414 while (!Worklist.isEmpty()) {
2437 VectorCombine
Combiner(
F,
TTI, DT, AA, AC,
DL, TryEarlyFoldsOnly);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetInsertPointPastAllocas(Function *F)
This specifies that created instructions should inserted at the beginning end of the specified functi...
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
'undef' values are things that do not have specified contents.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< UndefValue > m_UndefValue()
Match an arbitrary UndefValue constant.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.