37#define DEBUG_TYPE "vector-combine"
43STATISTIC(NumVecLoad,
"Number of vector loads formed");
44STATISTIC(NumVecCmp,
"Number of vector compares formed");
45STATISTIC(NumVecBO,
"Number of vector binops formed");
46STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
47STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
48STATISTIC(NumScalarBO,
"Number of scalar binops formed");
49STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
53 cl::desc(
"Disable all vector combine transforms"));
57 cl::desc(
"Disable binop extract to shuffle transforms"));
61 cl::desc(
"Max number of instructions to scan for vector combining."));
63static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
71 bool TryEarlyFoldsOnly)
72 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
73 CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 bool TryEarlyFoldsOnly;
100 unsigned PreferredExtractIndex)
const;
104 unsigned PreferredExtractIndex);
127 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
132 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
153 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
154 V = BitCast->getOperand(0);
162 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
163 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
169 Type *ScalarTy = Load->getType()->getScalarType();
172 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
179bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
193 auto *
Load = dyn_cast<LoadInst>(
X);
205 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
206 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
208 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
209 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
210 unsigned OffsetEltIndex = 0;
218 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
229 uint64_t ScalarSizeInBytes = ScalarSize / 8;
230 if (
Offset.urem(ScalarSizeInBytes) != 0)
234 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
235 if (OffsetEltIndex >= MinVecNumElts)
252 unsigned AS =
Load->getPointerAddressSpace();
270 auto *Ty = cast<FixedVectorType>(
I.getType());
271 unsigned OutputNumElts = Ty->getNumElements();
273 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
274 Mask[0] = OffsetEltIndex;
281 if (OldCost < NewCost || !NewCost.
isValid())
292 replaceValue(
I, *VecLd);
302 auto *Shuf = cast<ShuffleVectorInst>(&
I);
303 if (!Shuf->isIdentityWithPadding())
308 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
309 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
310 return M >= (int)(NumOpElts);
313 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
320 auto *Ty = cast<FixedVectorType>(
I.getType());
321 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
322 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
329 unsigned AS =
Load->getPointerAddressSpace();
344 if (OldCost < NewCost || !NewCost.
isValid())
351 replaceValue(
I, *VecLd);
363 assert(Index0C && Index1C &&
"Expected constant extract indexes");
365 unsigned Index0 = Index0C->getZExtValue();
366 unsigned Index1 = Index1C->getZExtValue();
369 if (Index0 == Index1)
393 if (PreferredExtractIndex == Index0)
395 if (PreferredExtractIndex == Index1)
399 return Index0 > Index1 ? Ext0 : Ext1;
411 unsigned PreferredExtractIndex) {
414 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
416 unsigned Opcode =
I.getOpcode();
420 auto *VecTy = cast<VectorType>(Ext0Src->
getType());
429 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
430 "Expected a compare");
440 unsigned Ext0Index = Ext0IndexC->getZExtValue();
441 unsigned Ext1Index = Ext1IndexC->getZExtValue();
455 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
456 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
457 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
462 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
467 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
469 OldCost = CheapExtractCost + ScalarOpCost;
470 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
474 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
475 NewCost = VectorOpCost + CheapExtractCost +
480 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
481 if (ConvertToShuffle) {
492 if (
auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
495 ShuffleMask[BestInsIndex] = BestExtIndex;
497 VecTy, ShuffleMask,
CostKind, 0,
nullptr,
502 {},
CostKind, 0,
nullptr, {ConvertToShuffle});
509 return OldCost < NewCost;
519 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
521 ShufMask[NewIndex] = OldIndex;
534 if (!isa<FixedVectorType>(
X->getType()))
540 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
541 if (isa<Constant>(
X))
554 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
557 "Expected matching constant extract indexes");
565 replaceValue(
I, *NewExt);
573 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
576 "Expected matching constant extract indexes");
586 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
587 VecBOInst->copyIRFlags(&
I);
590 replaceValue(
I, *NewExt);
618 auto *Ext0 = cast<ExtractElementInst>(I0);
619 auto *Ext1 = cast<ExtractElementInst>(I1);
626 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
629 if (ExtractToChange) {
630 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
635 if (ExtractToChange == Ext0)
642 foldExtExtCmp(Ext0, Ext1,
I);
644 foldExtExtBinop(Ext0, Ext1,
I);
670 auto *VecTy = cast<FixedVectorType>(
I.getType());
672 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
673 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
677 unsigned NumElts = VecTy->getNumElements();
678 if (Index >= NumElts)
685 std::iota(
Mask.begin(),
Mask.end(), 0);
701 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
712 if (NewCost > OldCost)
727 replaceValue(
I, *NewShuf);
746 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
747 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
748 if (!DestTy || !SrcTy)
751 unsigned DestEltSize = DestTy->getScalarSizeInBits();
752 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
753 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
756 bool IsUnary = isa<UndefValue>(V1);
763 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
764 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
769 if (DestEltSize <= SrcEltSize) {
772 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
773 unsigned ScaleFactor = SrcEltSize / DestEltSize;
778 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
779 unsigned ScaleFactor = DestEltSize / SrcEltSize;
786 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
791 unsigned NumOps = IsUnary ? 1 : 2;
801 TargetTransformInfo::CastContextHint::None,
806 TargetTransformInfo::CastContextHint::None,
809 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
810 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
812 if (NewCost > OldCost || !NewCost.
isValid())
820 replaceValue(
I, *Shuf);
827bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
828 if (!isa<VPIntrinsic>(
I))
841 if (!ScalarOp0 || !ScalarOp1)
849 auto IsAllTrueMask = [](
Value *MaskVal) {
851 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
852 return ConstValue->isAllOnesValue();
867 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
868 Mask.resize(FVTy->getNumElements(), 0);
877 Args.push_back(
V->getType());
883 std::optional<unsigned> FunctionalOpcode =
885 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
886 if (!FunctionalOpcode) {
910 <<
", Cost of scalarizing:" << NewCost <<
"\n");
913 if (OldCost < NewCost || !NewCost.
isValid())
924 bool SafeToSpeculate;
927 .
hasFnAttr(Attribute::AttrKind::Speculatable);
930 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
931 if (!SafeToSpeculate &&
938 {ScalarOp0, ScalarOp1})
940 ScalarOp0, ScalarOp1);
948bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
959 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
961 for (
User *U :
I.users())
971 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
972 Value *V0 =
nullptr, *V1 =
nullptr;
985 if (IsConst0 && IsConst1)
987 if (!IsConst0 && !IsConst1 && Index0 != Index1)
990 auto *VecTy0 = cast<VectorType>(Ins0->
getType());
991 auto *VecTy1 = cast<VectorType>(Ins1->
getType());
992 if (VecTy0->getElementCount().getKnownMinValue() <= Index0 ||
993 VecTy1->getElementCount().getKnownMinValue() <= Index1)
998 auto *I0 = dyn_cast_or_null<Instruction>(V0);
999 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
1000 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
1006 Type *VecTy =
I.getType();
1011 "Unexpected types for insert element into binop or cmp");
1013 unsigned Opcode =
I.getOpcode();
1029 Instruction::InsertElement, VecTy,
CostKind, Index);
1031 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
1033 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
1034 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
1037 if (OldCost < NewCost || !NewCost.
isValid())
1057 Scalar->setName(
I.getName() +
".scalar");
1061 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1062 ScalarInst->copyIRFlags(&
I);
1066 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1069 replaceValue(
I, *Insert);
1077 auto *BI = dyn_cast<BinaryOperator>(&
I);
1081 if (!BI || !
I.getType()->isIntegerTy(1))
1086 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1104 auto *Ext0 = cast<ExtractElementInst>(I0);
1105 auto *Ext1 = cast<ExtractElementInst>(I1);
1109 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1110 "Unknown ExtractElementInst");
1115 unsigned CmpOpcode =
1117 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1130 Ext0Cost + Ext1Cost + CmpCost * 2 +
1136 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1137 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1143 ShufMask[CheapIndex] = ExpensiveIndex;
1148 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1149 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1154 if (OldCost < NewCost || !NewCost.
isValid())
1164 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1165 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1168 replaceValue(
I, *NewExt);
1177 unsigned NumScanned = 0;
1187class ScalarizationResult {
1188 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1193 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1197 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1198 ~ScalarizationResult() {
1199 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1202 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1203 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1204 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1205 return {StatusTy::SafeWithFreeze, ToFreeze};
1209 bool isSafe()
const {
return Status == StatusTy::Safe; }
1211 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1214 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1219 Status = StatusTy::Unsafe;
1224 assert(isSafeWithFreeze() &&
1225 "should only be used when freezing is required");
1227 "UserI must be a user of ToFreeze");
1233 if (
U.get() == ToFreeze)
1250 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1252 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1253 if (
C->getValue().ult(NumElements))
1254 return ScalarizationResult::safe();
1255 return ScalarizationResult::unsafe();
1258 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1259 APInt Zero(IntWidth, 0);
1260 APInt MaxElts(IntWidth, NumElements);
1266 true, &AC, CtxI, &DT)))
1267 return ScalarizationResult::safe();
1268 return ScalarizationResult::unsafe();
1281 if (ValidIndices.
contains(IdxRange))
1282 return ScalarizationResult::safeWithFreeze(IdxBase);
1283 return ScalarizationResult::unsafe();
1293 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1295 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1307bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1308 auto *
SI = cast<StoreInst>(&
I);
1309 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1317 if (!
match(
SI->getValueOperand(),
1322 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1323 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1324 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1327 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1328 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1329 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1333 if (ScalarizableIdx.isUnsafe() ||
1338 if (ScalarizableIdx.isSafeWithFreeze())
1339 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1341 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1342 {ConstantInt::get(Idx->getType(), 0), Idx});
1349 replaceValue(
I, *NSI);
1358bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1363 auto *VecTy = cast<VectorType>(
I.getType());
1364 auto *LI = cast<LoadInst>(&
I);
1365 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1370 LI->getPointerAddressSpace(),
CostKind);
1374 unsigned NumInstChecked = 0;
1378 for (
auto &Pair : NeedFreeze)
1379 Pair.second.discard();
1386 auto *UI = dyn_cast<ExtractElementInst>(U);
1387 if (!UI || UI->getParent() != LI->getParent())
1394 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1401 LastCheckedInst = UI;
1405 if (ScalarIdx.isUnsafe())
1407 if (ScalarIdx.isSafeWithFreeze()) {
1409 ScalarIdx.discard();
1412 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1415 Index ?
Index->getZExtValue() : -1);
1422 if (ScalarizedCost >= OriginalCost)
1427 auto *EI = cast<ExtractElementInst>(U);
1431 auto It = NeedFreeze.
find(EI);
1432 if (It != NeedFreeze.
end())
1433 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1438 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1439 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1442 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1443 NewLoad->setAlignment(ScalarOpAlignment);
1445 replaceValue(*EI, *NewLoad);
1448 FailureGuard.release();
1455bool VectorCombine::foldConcatOfBoolMasks(
Instruction &
I) {
1456 Type *Ty =
I.getType();
1461 if (
DL->isBigEndian())
1488 if (ShAmtX > ShAmtY) {
1496 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
1497 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
1499 auto *MaskTy = dyn_cast<FixedVectorType>(SrcX->
getType());
1502 MaskTy->getNumElements() != ShAmtDiff ||
1503 MaskTy->getNumElements() > (
BitWidth / 2))
1512 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
1529 if (Ty != ConcatIntTy)
1535 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
1536 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1539 if (NewCost > OldCost)
1549 if (Ty != ConcatIntTy) {
1559 replaceValue(
I, *Result);
1565bool VectorCombine::foldPermuteOfBinops(
Instruction &
I) {
1589 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1590 auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->
getType());
1591 auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->
getType());
1592 auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->
getType());
1593 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
1596 unsigned NumSrcElts = BinOpTy->getNumElements();
1600 if ((BinOp->
isIntDivRem() || !isa<PoisonValue>(
I.getOperand(1))) &&
1601 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
1606 for (
int M : OuterMask) {
1607 if (M < 0 || M >= (
int)NumSrcElts) {
1620 OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I) +
1622 CostKind, 0,
nullptr, {Op00, Op01},
1625 CostKind, 0,
nullptr, {Op10, Op11},
1630 CostKind, 0,
nullptr, {Op00, Op01}) +
1632 CostKind, 0,
nullptr, {Op10, Op11}) +
1635 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
1636 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1640 if (NewCost > OldCost)
1648 if (
auto *NewInst = dyn_cast<Instruction>(NewBO))
1649 NewInst->copyIRFlags(BinOp);
1653 replaceValue(
I, *NewBO);
1659bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1667 if (
LHS->getOpcode() !=
RHS->getOpcode())
1671 bool IsCommutative =
false;
1675 auto *BO = cast<BinaryOperator>(LHS);
1679 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
1682 IsCommutative = cast<CmpInst>(LHS)->isCommutative();
1686 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1687 auto *BinResTy = dyn_cast<FixedVectorType>(
LHS->
getType());
1688 auto *BinOpTy = dyn_cast<FixedVectorType>(
X->getType());
1689 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
1692 unsigned NumSrcElts = BinOpTy->getNumElements();
1695 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
1698 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1699 if (M >= (
int)NumSrcElts)
1734 auto *ShuffleCmpTy =
1741 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1746 bool ReducedInstCount = (isa<Constant>(
X) && isa<Constant>(Z)) ||
1747 (isa<Constant>(
Y) && isa<Constant>(W));
1748 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
1755 cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
1756 : Builder.
CreateCmp(Pred, Shuf0, Shuf1);
1759 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1760 NewInst->copyIRFlags(LHS);
1761 NewInst->andIRFlags(RHS);
1766 replaceValue(
I, *NewBO);
1772bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1778 auto *C0 = dyn_cast<CastInst>(V0);
1779 auto *C1 = dyn_cast<CastInst>(V1);
1784 if (C0->getSrcTy() != C1->getSrcTy())
1788 if (Opcode != C1->getOpcode()) {
1790 Opcode = Instruction::SExt;
1795 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1796 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1797 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1798 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1801 unsigned NumSrcElts = CastSrcTy->getNumElements();
1802 unsigned NumDstElts = CastDstTy->getNumElements();
1803 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1804 "Only bitcasts expected to alter src/dst element counts");
1808 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1809 (NumDstElts % NumSrcElts) != 0)
1813 if (NumSrcElts >= NumDstElts) {
1816 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1817 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1822 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1823 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1828 auto *NewShuffleDstTy =
1853 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1855 if (NewCost > OldCost)
1863 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1864 NewInst->copyIRFlags(C0);
1865 NewInst->andIRFlags(C1);
1869 replaceValue(
I, *Cast);
1879bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1881 Value *OuterV0, *OuterV1;
1887 Value *X0, *X1, *Y0, *Y1;
1892 if (!Match0 && !Match1)
1895 X0 = Match0 ? X0 : OuterV0;
1896 Y0 = Match0 ? Y0 : OuterV0;
1897 X1 = Match1 ? X1 : OuterV1;
1898 Y1 = Match1 ? Y1 : OuterV1;
1899 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1900 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(X0->
getType());
1901 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(OuterV0->
getType());
1902 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1906 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1907 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1913 Value *NewX =
nullptr, *NewY =
nullptr;
1914 for (
int &M : NewMask) {
1915 Value *Src =
nullptr;
1916 if (0 <= M && M < (
int)NumImmElts) {
1920 Src =
M >= (int)NumSrcElts ? Y0 : X0;
1921 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
1923 }
else if (M >= (
int)NumImmElts) {
1928 Src =
M >= (int)NumSrcElts ? Y1 : X1;
1929 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
1933 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
1934 if (isa<UndefValue>(Src)) {
1937 if (!isa<PoisonValue>(Src))
1942 if (!NewX || NewX == Src) {
1946 if (!NewY || NewY == Src) {
1962 replaceValue(
I, *NewX);
1977 0,
nullptr, {OuterV0, OuterV1}, &
I);
1981 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
1986 SK, ShuffleSrcTy, NewMask,
CostKind, 0,
nullptr, {NewX, NewY});
1988 NewCost += InnerCost0;
1990 NewCost += InnerCost1;
1993 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1995 if (NewCost > OldCost)
1999 replaceValue(
I, *Shuf);
2005bool VectorCombine::foldShuffleOfIntrinsics(
Instruction &
I) {
2012 auto *II0 = dyn_cast<IntrinsicInst>(V0);
2013 auto *II1 = dyn_cast<IntrinsicInst>(V1);
2018 if (IID != II1->getIntrinsicID())
2021 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2022 auto *II0Ty = dyn_cast<FixedVectorType>(II0->getType());
2023 if (!ShuffleDstTy || !II0Ty)
2029 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2031 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2042 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2044 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2046 auto *VecTy = cast<FixedVectorType>(II0->getArgOperand(
I)->getType());
2048 VecTy->getNumElements() * 2));
2056 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2059 if (NewCost > OldCost)
2063 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2068 II1->getArgOperand(
I), OldMask);
2075 if (
auto *NewInst = dyn_cast<Instruction>(NewIntrinsic)) {
2077 NewInst->andIRFlags(II1);
2080 replaceValue(
I, *NewIntrinsic);
2087 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
2089 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
2090 int M = SV->getMaskValue(Lane);
2093 if (
static_cast<unsigned>(M) < NumElts) {
2094 U = &SV->getOperandUse(0);
2097 U = &SV->getOperandUse(1);
2108 auto [U, Lane] = IL;
2121 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
2122 unsigned NumElts = Ty->getNumElements();
2123 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2129 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2133 unsigned NumSlices = Item.
size() / NumElts;
2138 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2139 Use *SliceV = Item[Slice * NumElts].first;
2140 if (!SliceV || SliceV->get()->
getType() != Ty)
2142 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2143 auto [V, Lane] = Item[Slice * NumElts + Elt];
2144 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2157 auto [FrontU, FrontLane] = Item.
front();
2159 if (IdentityLeafs.
contains(FrontU)) {
2160 return FrontU->get();
2166 if (ConcatLeafs.
contains(FrontU)) {
2168 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
2170 for (
unsigned S = 0; S < Values.
size(); ++S)
2171 Values[S] = Item[S * NumElts].first->get();
2173 while (Values.
size() > 1) {
2176 std::iota(Mask.begin(), Mask.end(), 0);
2178 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2186 auto *
I = cast<Instruction>(FrontU->get());
2187 auto *
II = dyn_cast<IntrinsicInst>(
I);
2188 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2190 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
2197 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2202 for (
const auto &Lane : Item)
2208 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
2214 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
2215 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
2219 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
2224 if (
auto *CI = dyn_cast<CastInst>(
I)) {
2235 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
2245bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
2246 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
2247 if (!Ty ||
I.use_empty())
2251 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
2257 unsigned NumVisited = 0;
2259 while (!Worklist.
empty()) {
2264 auto [FrontU, FrontLane] = Item.
front();
2272 return X->getType() ==
Y->getType() &&
2277 if (FrontLane == 0 &&
2278 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
2279 Ty->getNumElements() &&
2282 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
2283 E.value().second == (
int)E.index());
2285 IdentityLeafs.
insert(FrontU);
2289 if (
auto *
C = dyn_cast<Constant>(FrontU);
2290 C &&
C->getSplatValue() &&
2294 return !U || (isa<Constant>(
U->get()) &&
2295 cast<Constant>(
U->get())->getSplatValue() ==
2296 cast<Constant>(FrontV)->getSplatValue());
2298 SplatLeafs.
insert(FrontU);
2303 auto [FrontU, FrontLane] = Item.
front();
2304 auto [
U, Lane] = IL;
2305 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
2307 SplatLeafs.
insert(FrontU);
2313 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
2317 Value *
V = IL.first->get();
2318 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUse())
2322 if (
auto *CI = dyn_cast<CmpInst>(V))
2323 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
2325 if (
auto *CI = dyn_cast<CastInst>(V))
2326 if (CI->getSrcTy()->getScalarType() !=
2327 cast<CastInst>(FrontV)->getSrcTy()->getScalarType())
2329 if (
auto *SI = dyn_cast<SelectInst>(V))
2330 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
2331 SI->getOperand(0)->getType() !=
2332 cast<SelectInst>(FrontV)->getOperand(0)->getType())
2334 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
2336 auto *
II = dyn_cast<IntrinsicInst>(V);
2337 return !
II || (isa<IntrinsicInst>(FrontV) &&
2338 II->getIntrinsicID() ==
2339 cast<IntrinsicInst>(FrontV)->getIntrinsicID() &&
2340 !
II->hasOperandBundles());
2344 if (isa<BinaryOperator, CmpInst>(FrontU)) {
2346 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
2347 BO && BO->isIntDivRem())
2356 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
2358 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
2359 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
2360 if (DstTy && SrcTy &&
2361 SrcTy->getNumElements() == DstTy->getNumElements()) {
2365 }
else if (isa<SelectInst>(FrontU)) {
2370 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
2372 !
II->hasOperandBundles()) {
2373 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
2379 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
2380 cast<Instruction>(FrontV)->getOperand(
Op));
2392 ConcatLeafs.
insert(FrontU);
2399 if (NumVisited <= 1)
2402 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
2408 ConcatLeafs, Builder, &
TTI);
2409 replaceValue(
I, *V);
2416bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
2417 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2420 switch (
II->getIntrinsicID()) {
2421 case Intrinsic::vector_reduce_add:
2422 case Intrinsic::vector_reduce_mul:
2423 case Intrinsic::vector_reduce_and:
2424 case Intrinsic::vector_reduce_or:
2425 case Intrinsic::vector_reduce_xor:
2426 case Intrinsic::vector_reduce_smin:
2427 case Intrinsic::vector_reduce_smax:
2428 case Intrinsic::vector_reduce_umin:
2429 case Intrinsic::vector_reduce_umax:
2438 std::queue<Value *> Worklist;
2441 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
2444 while (!Worklist.empty()) {
2445 Value *CV = Worklist.front();
2456 if (
auto *CI = dyn_cast<Instruction>(CV)) {
2457 if (CI->isBinaryOp()) {
2458 for (
auto *
Op : CI->operand_values())
2461 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
2462 if (Shuffle && Shuffle != SV)
2479 for (
auto *V : Visited)
2480 for (
auto *U :
V->users())
2481 if (!Visited.contains(U) && U != &
I)
2485 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
2490 if (!ShuffleInputType)
2498 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
2502 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
2503 bool UsesSecondVec =
2504 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
2507 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
2513 VecTyForCost, ConcatMask,
CostKind);
2515 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
2517 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2519 if (NewCost < OldCost) {
2523 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
2524 replaceValue(*Shuffle, *NewShuffle);
2529 return foldSelectShuffle(*Shuffle,
true);
2536bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
2537 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2541 bool TruncOnly =
false;
2544 case Intrinsic::vector_reduce_add:
2545 case Intrinsic::vector_reduce_mul:
2548 case Intrinsic::vector_reduce_and:
2549 case Intrinsic::vector_reduce_or:
2550 case Intrinsic::vector_reduce_xor:
2557 Value *ReductionSrc =
I.getOperand(0);
2567 auto *SrcTy = cast<VectorType>(Src->getType());
2568 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
2569 Type *ResultTy =
I.getType();
2572 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
2575 cast<CastInst>(ReductionSrc));
2582 if (OldCost <= NewCost || !NewCost.
isValid())
2586 II->getIntrinsicID(), {Src});
2588 replaceValue(
I, *NewCast);
2602bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2603 auto *SVI = cast<ShuffleVectorInst>(&
I);
2604 auto *VT = cast<FixedVectorType>(
I.getType());
2605 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2606 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2607 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2611 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2612 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2613 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2614 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2617 if (!
I ||
I->getOperand(0)->getType() != VT)
2620 return U != Op0 && U != Op1 &&
2621 !(isa<ShuffleVectorInst>(U) &&
2622 (InputShuffles.contains(cast<Instruction>(U)) ||
2623 isInstructionTriviallyDead(cast<Instruction>(U))));
2626 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2627 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2635 for (
auto *U :
I->users()) {
2636 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2637 if (!SV || SV->getType() != VT)
2639 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2640 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2647 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2651 if (FromReduction && Shuffles.
size() > 1)
2656 if (!FromReduction) {
2658 for (
auto *U : SV->users()) {
2661 Shuffles.push_back(SSV);
2673 int MaxV1Elt = 0, MaxV2Elt = 0;
2674 unsigned NumElts = VT->getNumElements();
2677 SVN->getShuffleMask(Mask);
2681 Value *SVOp0 = SVN->getOperand(0);
2682 Value *SVOp1 = SVN->getOperand(1);
2683 if (isa<UndefValue>(SVOp1)) {
2684 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2687 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2693 if (SVOp0 == Op1 && SVOp1 == Op0) {
2697 if (SVOp0 != Op0 || SVOp1 != Op1)
2704 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2707 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2708 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2709 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2710 return Mask[
I] ==
A.first;
2719 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2720 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2721 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2724 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2727 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2735 sort(ReconstructMask);
2736 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2743 if (V1.
empty() ||
V2.empty() ||
2744 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2745 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2752 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2755 if (isa<UndefValue>(SV->getOperand(1)))
2756 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2757 if (InputShuffles.contains(SSV))
2759 return SV->getMaskValue(M);
2767 std::pair<int, int>
Y) {
2768 int MXA = GetBaseMaskValue(
A,
X.first);
2769 int MYA = GetBaseMaskValue(
A,
Y.first);
2772 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2773 return SortBase(SVI0A,
A,
B);
2775 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2776 return SortBase(SVI1A,
A,
B);
2781 for (
const auto &Mask : OrigReconstructMasks) {
2783 for (
int M : Mask) {
2785 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2786 assert(It !=
V.end() &&
"Expected all entries in Mask");
2787 return std::distance(
V.begin(), It);
2791 else if (M <
static_cast<int>(NumElts)) {
2792 ReconstructMask.
push_back(FindIndex(V1, M));
2794 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2797 ReconstructMasks.push_back(std::move(ReconstructMask));
2803 for (
unsigned I = 0;
I < V1.
size();
I++) {
2804 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2805 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2807 for (
unsigned I = 0;
I <
V2.size();
I++) {
2808 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2809 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2811 while (V1A.
size() < NumElts) {
2815 while (V2A.
size() < NumElts) {
2821 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2827 VT, SV->getShuffleMask(),
CostKind);
2838 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2840 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2852 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2854 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2856 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2859 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2861 <<
" vs CostAfter: " << CostAfter <<
"\n");
2862 if (CostBefore <= CostAfter)
2867 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2870 if (isa<UndefValue>(SV->getOperand(1)))
2871 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2872 if (InputShuffles.contains(SSV))
2874 return SV->getOperand(
Op);
2878 GetShuffleOperand(SVI0A, 1), V1A);
2881 GetShuffleOperand(SVI0B, 1), V1B);
2884 GetShuffleOperand(SVI1A, 1), V2A);
2887 GetShuffleOperand(SVI1B, 1), V2B);
2891 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2892 I->copyIRFlags(Op0,
true);
2896 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2897 I->copyIRFlags(Op1,
true);
2899 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2902 replaceValue(*Shuffles[S], *NSV);
2905 Worklist.pushValue(NSV0A);
2906 Worklist.pushValue(NSV0B);
2907 Worklist.pushValue(NSV1A);
2908 Worklist.pushValue(NSV1B);
2909 for (
auto *S : Shuffles)
2921 Value *ZExted, *OtherOperand;
2927 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
2929 auto *BigTy = cast<FixedVectorType>(
I.getType());
2930 auto *SmallTy = cast<FixedVectorType>(ZExted->
getType());
2931 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
2933 if (
I.getOpcode() == Instruction::LShr) {
2950 Instruction::ZExt, BigTy, SmallTy,
2951 TargetTransformInfo::CastContextHint::None,
CostKind);
2957 auto *UI = cast<Instruction>(U);
2963 ShrinkCost += ZExtCost;
2978 ShrinkCost += ZExtCost;
2983 if (!isa<Constant>(OtherOperand))
2985 Instruction::Trunc, SmallTy, BigTy,
2986 TargetTransformInfo::CastContextHint::None,
CostKind);
2991 if (ShrinkCost > CurrentCost)
2995 Value *Op0 = ZExted;
2998 if (
I.getOperand(0) == OtherOperand)
3002 cast<Instruction>(NewBinOp)->copyIRFlags(&
I);
3003 cast<Instruction>(NewBinOp)->copyMetadata(
I);
3005 replaceValue(
I, *NewZExtr);
3011bool VectorCombine::foldInsExtVectorToShuffle(
Instruction &
I) {
3012 Value *DstVec, *SrcVec;
3020 auto *VecTy = dyn_cast<FixedVectorType>(
I.getType());
3021 if (!VecTy || SrcVec->
getType() != VecTy)
3024 unsigned NumElts = VecTy->getNumElements();
3025 if (ExtIdx >= NumElts || InsIdx >= NumElts)
3031 if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3033 Mask[InsIdx] = ExtIdx;
3037 std::iota(
Mask.begin(),
Mask.end(), 0);
3038 Mask[InsIdx] = ExtIdx + NumElts;
3042 auto *
Ins = cast<InsertElementInst>(&
I);
3043 auto *
Ext = cast<ExtractElementInst>(
I.getOperand(1));
3051 nullptr, {DstVec, SrcVec});
3052 if (!
Ext->hasOneUse())
3055 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair : " <<
I
3056 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3059 if (OldCost < NewCost)
3063 if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3069 replaceValue(
I, *Shuf);
3076bool VectorCombine::run() {
3086 bool MadeChange =
false;
3089 bool IsVectorType = isa<VectorType>(
I.getType());
3090 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
3091 auto Opcode =
I.getOpcode();
3099 if (IsFixedVectorType) {
3101 case Instruction::InsertElement:
3102 MadeChange |= vectorizeLoadInsert(
I);
3104 case Instruction::ShuffleVector:
3105 MadeChange |= widenSubvectorLoad(
I);
3115 MadeChange |= scalarizeBinopOrCmp(
I);
3116 MadeChange |= scalarizeLoadExtract(
I);
3117 MadeChange |= scalarizeVPIntrinsic(
I);
3120 if (Opcode == Instruction::Store)
3121 MadeChange |= foldSingleElementStore(
I);
3124 if (TryEarlyFoldsOnly)
3131 if (IsFixedVectorType) {
3133 case Instruction::InsertElement:
3134 MadeChange |= foldInsExtFNeg(
I);
3135 MadeChange |= foldInsExtVectorToShuffle(
I);
3137 case Instruction::ShuffleVector:
3138 MadeChange |= foldPermuteOfBinops(
I);
3139 MadeChange |= foldShuffleOfBinops(
I);
3140 MadeChange |= foldShuffleOfCastops(
I);
3141 MadeChange |= foldShuffleOfShuffles(
I);
3142 MadeChange |= foldShuffleOfIntrinsics(
I);
3143 MadeChange |= foldSelectShuffle(
I);
3144 MadeChange |= foldShuffleToIdentity(
I);
3146 case Instruction::BitCast:
3147 MadeChange |= foldBitcastShuffle(
I);
3150 MadeChange |= shrinkType(
I);
3155 case Instruction::Call:
3156 MadeChange |= foldShuffleFromReductions(
I);
3157 MadeChange |= foldCastFromReductions(
I);
3159 case Instruction::ICmp:
3160 case Instruction::FCmp:
3161 MadeChange |= foldExtractExtract(
I);
3163 case Instruction::Or:
3164 MadeChange |= foldConcatOfBoolMasks(
I);
3168 MadeChange |= foldExtractExtract(
I);
3169 MadeChange |= foldExtractedCmps(
I);
3182 if (
I.isDebugOrPseudoInst())
3188 while (!Worklist.isEmpty()) {
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilder<> &Builder, const TargetTransformInfo *TTI)
std::pair< Use *, int > InstLane
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static constexpr int Concat[]
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This class represents a sign extension of integer types.
This class represents a cast from signed integer to floating point.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This class represents zero extension of integer types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.