36#define DEBUG_TYPE "vector-combine"
42STATISTIC(NumVecLoad,
"Number of vector loads formed");
43STATISTIC(NumVecCmp,
"Number of vector compares formed");
44STATISTIC(NumVecBO,
"Number of vector binops formed");
45STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
46STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
47STATISTIC(NumScalarBO,
"Number of scalar binops formed");
48STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
52 cl::desc(
"Disable all vector combine transforms"));
56 cl::desc(
"Disable binop extract to shuffle transforms"));
60 cl::desc(
"Max number of instructions to scan for vector combining."));
62static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
70 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
71 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
86 bool TryEarlyFoldsOnly;
97 unsigned PreferredExtractIndex)
const;
101 unsigned PreferredExtractIndex);
119 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
123 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
143 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
144 V = BitCast->getOperand(0);
152 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
153 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
159 Type *ScalarTy = Load->getType()->getScalarType();
162 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
169bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
183 auto *
Load = dyn_cast<LoadInst>(
X);
195 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
196 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
198 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
199 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
200 unsigned OffsetEltIndex = 0;
208 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
219 uint64_t ScalarSizeInBytes = ScalarSize / 8;
220 if (
Offset.urem(ScalarSizeInBytes) != 0)
224 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
225 if (OffsetEltIndex >= MinVecNumElts)
242 unsigned AS =
Load->getPointerAddressSpace();
261 auto *Ty = cast<FixedVectorType>(
I.getType());
262 unsigned OutputNumElts = Ty->getNumElements();
264 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
265 Mask[0] = OffsetEltIndex;
271 if (OldCost < NewCost || !NewCost.
isValid())
282 replaceValue(
I, *VecLd);
292 auto *Shuf = cast<ShuffleVectorInst>(&
I);
293 if (!Shuf->isIdentityWithPadding())
298 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
299 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
300 return M >= (int)(NumOpElts);
303 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
310 auto *Ty = cast<FixedVectorType>(
I.getType());
311 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
312 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
319 unsigned AS =
Load->getPointerAddressSpace();
334 if (OldCost < NewCost || !NewCost.
isValid())
341 replaceValue(
I, *VecLd);
353 assert(Index0C && Index1C &&
"Expected constant extract indexes");
355 unsigned Index0 = Index0C->getZExtValue();
356 unsigned Index1 = Index1C->getZExtValue();
359 if (Index0 == Index1)
384 if (PreferredExtractIndex == Index0)
386 if (PreferredExtractIndex == Index1)
390 return Index0 > Index1 ? Ext0 : Ext1;
402 unsigned PreferredExtractIndex) {
403 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->
getOperand(1));
404 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->
getOperand(1));
405 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
407 unsigned Opcode =
I.getOpcode();
418 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
419 "Expected a compare");
429 unsigned Ext0Index = Ext0IndexC->getZExtValue();
430 unsigned Ext1Index = Ext1IndexC->getZExtValue();
445 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
455 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
457 OldCost = CheapExtractCost + ScalarOpCost;
458 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
462 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
463 NewCost = VectorOpCost + CheapExtractCost +
468 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
469 if (ConvertToShuffle) {
487 return OldCost < NewCost;
497 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
499 ShufMask[NewIndex] = OldIndex;
518 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
519 if (isa<Constant>(
X))
532 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
535 "Expected matching constant extract indexes");
543 replaceValue(
I, *NewExt);
551 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
554 "Expected matching constant extract indexes");
564 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
565 VecBOInst->copyIRFlags(&
I);
568 replaceValue(
I, *NewExt);
596 auto *Ext0 = cast<ExtractElementInst>(I0);
597 auto *Ext1 = cast<ExtractElementInst>(I1);
604 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
607 if (ExtractToChange) {
608 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
613 if (ExtractToChange == Ext0)
620 foldExtExtCmp(Ext0, Ext1,
I);
622 foldExtExtBinop(Ext0, Ext1,
I);
649 auto *VecTy = cast<FixedVectorType>(
I.getType());
650 if (SrcVec->
getType() != VecTy)
654 unsigned NumElts = VecTy->getNumElements();
655 if (
Index >= NumElts)
662 std::iota(
Mask.begin(),
Mask.end(), 0);
681 if (NewCost > OldCost)
688 replaceValue(
I, *Shuf);
707 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
708 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
709 if (!DestTy || !SrcTy)
712 unsigned DestEltSize = DestTy->getScalarSizeInBits();
713 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
714 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
717 bool IsUnary = isa<UndefValue>(V1);
724 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
725 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
730 if (DestEltSize <= SrcEltSize) {
733 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
734 unsigned ScaleFactor = SrcEltSize / DestEltSize;
739 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
740 unsigned ScaleFactor = DestEltSize / SrcEltSize;
747 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
752 unsigned NumOps = IsUnary ? 1 : 2;
764 TargetTransformInfo::CastContextHint::None,
769 TargetTransformInfo::CastContextHint::None, CK);
770 if (DestCost > SrcCost || !DestCost.
isValid())
778 replaceValue(
I, *Shuf);
785bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
786 if (!isa<VPIntrinsic>(
I))
799 if (!ScalarOp0 || !ScalarOp1)
807 auto IsAllTrueMask = [](
Value *MaskVal) {
809 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
810 return ConstValue->isAllOnesValue();
826 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
827 Mask.resize(FVTy->getNumElements(), 0);
835 Args.push_back(
V->getType());
841 std::optional<unsigned> FunctionalOpcode =
843 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
844 if (!FunctionalOpcode) {
868 <<
", Cost of scalarizing:" << NewCost <<
"\n");
871 if (OldCost < NewCost || !NewCost.
isValid())
882 bool SafeToSpeculate;
885 .
hasFnAttr(Attribute::AttrKind::Speculatable);
888 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
889 if (!SafeToSpeculate &&
896 {ScalarOp0, ScalarOp1})
898 ScalarOp0, ScalarOp1);
906bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
917 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
919 for (
User *U :
I.users())
929 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
930 Value *V0 =
nullptr, *V1 =
nullptr;
943 if (IsConst0 && IsConst1)
945 if (!IsConst0 && !IsConst1 && Index0 != Index1)
950 auto *I0 = dyn_cast_or_null<Instruction>(V0);
951 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
952 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
958 Type *VecTy =
I.getType();
963 "Unexpected types for insert element into binop or cmp");
965 unsigned Opcode =
I.getOpcode();
984 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
986 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
987 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
990 if (OldCost < NewCost || !NewCost.
isValid())
1010 Scalar->setName(
I.getName() +
".scalar");
1014 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1015 ScalarInst->copyIRFlags(&
I);
1019 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1022 replaceValue(
I, *Insert);
1032 if (!
I.isBinaryOp() || !
I.getType()->isIntegerTy(1))
1038 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1056 auto *Ext0 = cast<ExtractElementInst>(I0);
1057 auto *Ext1 = cast<ExtractElementInst>(I1);
1066 : Instruction::ICmp;
1067 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1084 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1085 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1090 ShufMask[CheapIndex] = ExpensiveIndex;
1099 if (OldCost < NewCost || !NewCost.
isValid())
1113 replaceValue(
I, *NewExt);
1122 unsigned NumScanned = 0;
1132class ScalarizationResult {
1133 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1138 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1142 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1143 ~ScalarizationResult() {
1144 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1147 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1148 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1149 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1150 return {StatusTy::SafeWithFreeze, ToFreeze};
1154 bool isSafe()
const {
return Status == StatusTy::Safe; }
1156 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1159 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1164 Status = StatusTy::Unsafe;
1169 assert(isSafeWithFreeze() &&
1170 "should only be used when freezing is required");
1172 "UserI must be a user of ToFreeze");
1178 if (
U.get() == ToFreeze)
1195 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1197 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1198 if (
C->getValue().ult(NumElements))
1199 return ScalarizationResult::safe();
1200 return ScalarizationResult::unsafe();
1203 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1204 APInt Zero(IntWidth, 0);
1205 APInt MaxElts(IntWidth, NumElements);
1211 true, &AC, CtxI, &DT)))
1212 return ScalarizationResult::safe();
1213 return ScalarizationResult::unsafe();
1226 if (ValidIndices.
contains(IdxRange))
1227 return ScalarizationResult::safeWithFreeze(IdxBase);
1228 return ScalarizationResult::unsafe();
1238 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1240 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1252bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1253 auto *
SI = cast<StoreInst>(&
I);
1254 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1262 if (!
match(
SI->getValueOperand(),
1267 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1268 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1269 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1272 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1273 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1274 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1278 if (ScalarizableIdx.isUnsafe() ||
1283 if (ScalarizableIdx.isSafeWithFreeze())
1284 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1286 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1287 {ConstantInt::get(Idx->getType(), 0), Idx});
1294 replaceValue(
I, *NSI);
1303bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1308 auto *VecTy = cast<VectorType>(
I.getType());
1309 auto *LI = cast<LoadInst>(&
I);
1310 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1315 LI->getPointerAddressSpace());
1319 unsigned NumInstChecked = 0;
1323 for (
auto &Pair : NeedFreeze)
1324 Pair.second.discard();
1331 auto *UI = dyn_cast<ExtractElementInst>(U);
1332 if (!UI || UI->getParent() != LI->getParent())
1339 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1346 LastCheckedInst = UI;
1350 if (ScalarIdx.isUnsafe())
1352 if (ScalarIdx.isSafeWithFreeze()) {
1354 ScalarIdx.discard();
1357 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1364 Align(1), LI->getPointerAddressSpace());
1368 if (ScalarizedCost >= OriginalCost)
1373 auto *EI = cast<ExtractElementInst>(U);
1377 auto It = NeedFreeze.
find(EI);
1378 if (It != NeedFreeze.
end())
1379 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1384 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1385 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1388 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1389 NewLoad->setAlignment(ScalarOpAlignment);
1391 replaceValue(*EI, *NewLoad);
1394 FailureGuard.release();
1399bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1416 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1417 auto *BinOpTy = dyn_cast<FixedVectorType>(B0->
getType());
1418 if (!ShuffleDstTy || !BinOpTy)
1421 unsigned NumSrcElts = BinOpTy->getNumElements();
1426 if (BinaryOperator::isCommutative(Opcode) &&
X != Z &&
Y != W &&
1430 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1431 if (M >= (
int)NumSrcElts)
1458 OldMask,
CostKind, 0,
nullptr, {B0, B1}, &
I);
1466 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1468 if (NewCost >= OldCost)
1476 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1477 NewInst->copyIRFlags(B0);
1478 NewInst->andIRFlags(B1);
1483 replaceValue(
I, *NewBO);
1489bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1496 auto *C0 = dyn_cast<CastInst>(V0);
1497 auto *C1 = dyn_cast<CastInst>(V1);
1502 if (C0->getSrcTy() != C1->getSrcTy())
1506 if (Opcode != C1->getOpcode()) {
1508 Opcode = Instruction::SExt;
1513 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1514 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1515 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1516 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1519 unsigned NumSrcElts = CastSrcTy->getNumElements();
1520 unsigned NumDstElts = CastDstTy->getNumElements();
1521 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1522 "Only bitcasts expected to alter src/dst element counts");
1526 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1527 (NumDstElts % NumSrcElts) != 0)
1531 if (NumSrcElts >= NumDstElts) {
1534 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1535 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1540 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1541 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1546 auto *NewShuffleDstTy =
1559 OldMask,
CostKind, 0,
nullptr, std::nullopt, &
I);
1567 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1569 if (NewCost > OldCost)
1577 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1578 NewInst->copyIRFlags(C0);
1579 NewInst->andIRFlags(C1);
1583 replaceValue(
I, *Cast);
1589bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1600 auto *ShufI0 = dyn_cast<Instruction>(
I.getOperand(0));
1601 auto *ShufI1 = dyn_cast<Instruction>(
I.getOperand(1));
1602 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1603 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(V0->
getType());
1604 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(
I.getOperand(0)->getType());
1605 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1609 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1610 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1613 if ((!isa<PoisonValue>(U0) &&
1614 any_of(InnerMask0, [&](
int M) {
return M >= (int)NumSrcElts; })) ||
1615 (!isa<PoisonValue>(U1) &&
1616 any_of(InnerMask1, [&](
int M) {
return M >= (int)NumSrcElts; })))
1621 for (
int &M : NewMask) {
1622 if (0 <= M && M < (
int)NumImmElts) {
1624 }
else if (M >= (
int)NumImmElts) {
1625 if (InnerMask1[M - NumImmElts] >= (
int)NumSrcElts)
1628 M = InnerMask1[
M - NumImmElts] + (V0 == V1 ? 0 : NumSrcElts);
1634 replaceValue(
I, *V0);
1643 InnerMask0,
CostKind, 0,
nullptr, {V0, U0}, ShufI0) +
1645 InnerMask1,
CostKind, 0,
nullptr, {V1, U1}, ShufI1) +
1647 OuterMask,
CostKind, 0,
nullptr, {ShufI0, ShufI1}, &
I);
1651 NewMask,
CostKind, 0,
nullptr, {V0, V1});
1654 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1656 if (NewCost > OldCost)
1660 if (
none_of(NewMask, [&](
int M) {
return 0 <=
M &&
M < (int)NumSrcElts; }))
1662 if (
none_of(NewMask, [&](
int M) {
return (
int)NumSrcElts <= M; }))
1666 replaceValue(
I, *Shuf);
1673bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
1674 auto *II = dyn_cast<IntrinsicInst>(&
I);
1677 switch (II->getIntrinsicID()) {
1678 case Intrinsic::vector_reduce_add:
1679 case Intrinsic::vector_reduce_mul:
1680 case Intrinsic::vector_reduce_and:
1681 case Intrinsic::vector_reduce_or:
1682 case Intrinsic::vector_reduce_xor:
1683 case Intrinsic::vector_reduce_smin:
1684 case Intrinsic::vector_reduce_smax:
1685 case Intrinsic::vector_reduce_umin:
1686 case Intrinsic::vector_reduce_umax:
1695 std::queue<Value *> Worklist;
1698 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
1701 while (!Worklist.empty()) {
1702 Value *CV = Worklist.front();
1713 if (
auto *CI = dyn_cast<Instruction>(CV)) {
1714 if (CI->isBinaryOp()) {
1715 for (
auto *
Op : CI->operand_values())
1718 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
1719 if (Shuffle && Shuffle != SV)
1736 for (
auto *V : Visited)
1737 for (
auto *U :
V->users())
1738 if (!Visited.contains(U) && U != &
I)
1742 dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
1747 if (!ShuffleInputType)
1755 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
1759 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
1760 bool UsesSecondVec =
1761 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
1764 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
1770 VecTyForCost, ConcatMask);
1772 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
1774 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1776 if (NewCost < OldCost) {
1780 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
1781 replaceValue(*Shuffle, *NewShuffle);
1786 return foldSelectShuffle(*Shuffle,
true);
1791bool VectorCombine::foldTruncFromReductions(
Instruction &
I) {
1792 auto *II = dyn_cast<IntrinsicInst>(&
I);
1798 case Intrinsic::vector_reduce_add:
1799 case Intrinsic::vector_reduce_mul:
1800 case Intrinsic::vector_reduce_and:
1801 case Intrinsic::vector_reduce_or:
1802 case Intrinsic::vector_reduce_xor:
1809 Value *ReductionSrc =
I.getOperand(0);
1815 auto *Trunc = cast<CastInst>(ReductionSrc);
1816 auto *TruncSrcTy = cast<VectorType>(TruncSrc->
getType());
1817 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
1818 Type *ResultTy =
I.getType();
1830 ReductionSrcTy->getScalarType(),
1833 if (OldCost <= NewCost || !NewCost.
isValid())
1837 TruncSrcTy->getScalarType(), II->getIntrinsicID(), {TruncSrc});
1839 replaceValue(
I, *NewTruncation);
1853bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
1854 auto *SVI = cast<ShuffleVectorInst>(&
I);
1855 auto *VT = cast<FixedVectorType>(
I.getType());
1856 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
1857 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
1858 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
1862 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
1863 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
1864 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
1865 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
1868 if (!
I ||
I->getOperand(0)->getType() != VT)
1871 return U != Op0 && U != Op1 &&
1872 !(isa<ShuffleVectorInst>(U) &&
1873 (InputShuffles.contains(cast<Instruction>(U)) ||
1874 isInstructionTriviallyDead(cast<Instruction>(U))));
1877 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
1878 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
1886 for (
auto *U :
I->users()) {
1887 auto *SV = dyn_cast<ShuffleVectorInst>(U);
1888 if (!SV || SV->getType() != VT)
1890 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
1891 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
1898 if (!collectShuffles(Op0) || !collectShuffles(Op1))
1902 if (FromReduction && Shuffles.
size() > 1)
1907 if (!FromReduction) {
1909 for (
auto *U : SV->users()) {
1912 Shuffles.push_back(SSV);
1924 int MaxV1Elt = 0, MaxV2Elt = 0;
1925 unsigned NumElts = VT->getNumElements();
1928 SVN->getShuffleMask(Mask);
1932 Value *SVOp0 = SVN->getOperand(0);
1933 Value *SVOp1 = SVN->getOperand(1);
1934 if (isa<UndefValue>(SVOp1)) {
1935 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
1938 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
1944 if (SVOp0 == Op1 && SVOp1 == Op0) {
1948 if (SVOp0 != Op0 || SVOp1 != Op1)
1955 for (
unsigned I = 0;
I <
Mask.size();
I++) {
1958 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
1959 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
1960 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
1961 return Mask[
I] ==
A.first;
1970 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
1971 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
1972 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
1975 ReconstructMask.
push_back(NumElts + It -
V2.begin());
1978 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
1986 sort(ReconstructMask);
1987 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
1994 if (V1.
empty() ||
V2.empty() ||
1995 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
1996 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2003 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2006 if (isa<UndefValue>(SV->getOperand(1)))
2007 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2008 if (InputShuffles.contains(SSV))
2010 return SV->getMaskValue(M);
2018 std::pair<int, int>
Y) {
2019 int MXA = GetBaseMaskValue(
A,
X.first);
2020 int MYA = GetBaseMaskValue(
A,
Y.first);
2023 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2024 return SortBase(SVI0A,
A,
B);
2026 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2027 return SortBase(SVI1A,
A,
B);
2032 for (
const auto &Mask : OrigReconstructMasks) {
2034 for (
int M : Mask) {
2036 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2037 assert(It !=
V.end() &&
"Expected all entries in Mask");
2038 return std::distance(
V.begin(), It);
2042 else if (M <
static_cast<int>(NumElts)) {
2043 ReconstructMask.
push_back(FindIndex(V1, M));
2045 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2048 ReconstructMasks.push_back(std::move(ReconstructMask));
2054 for (
unsigned I = 0;
I < V1.
size();
I++) {
2055 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2056 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2058 for (
unsigned I = 0;
I <
V2.size();
I++) {
2059 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2060 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2062 while (V1A.
size() < NumElts) {
2066 while (V2A.
size() < NumElts) {
2072 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2078 VT, SV->getShuffleMask());
2089 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2091 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2103 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2105 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2107 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2110 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2112 <<
" vs CostAfter: " << CostAfter <<
"\n");
2113 if (CostBefore <= CostAfter)
2118 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2121 if (isa<UndefValue>(SV->getOperand(1)))
2122 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2123 if (InputShuffles.contains(SSV))
2125 return SV->getOperand(
Op);
2129 GetShuffleOperand(SVI0A, 1), V1A);
2132 GetShuffleOperand(SVI0B, 1), V1B);
2135 GetShuffleOperand(SVI1A, 1), V2A);
2138 GetShuffleOperand(SVI1B, 1), V2B);
2142 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2143 I->copyIRFlags(Op0,
true);
2147 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2148 I->copyIRFlags(Op1,
true);
2150 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2153 replaceValue(*Shuffles[S], *NSV);
2156 Worklist.pushValue(NSV0A);
2157 Worklist.pushValue(NSV0B);
2158 Worklist.pushValue(NSV1A);
2159 Worklist.pushValue(NSV1B);
2160 for (
auto *S : Shuffles)
2167bool VectorCombine::run() {
2175 bool MadeChange =
false;
2178 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
2179 auto Opcode =
I.getOpcode();
2185 if (IsFixedVectorType) {
2187 case Instruction::InsertElement:
2188 MadeChange |= vectorizeLoadInsert(
I);
2190 case Instruction::ShuffleVector:
2191 MadeChange |= widenSubvectorLoad(
I);
2200 if (isa<VectorType>(
I.getType())) {
2201 MadeChange |= scalarizeBinopOrCmp(
I);
2202 MadeChange |= scalarizeLoadExtract(
I);
2203 MadeChange |= scalarizeVPIntrinsic(
I);
2206 if (Opcode == Instruction::Store)
2207 MadeChange |= foldSingleElementStore(
I);
2210 if (TryEarlyFoldsOnly)
2217 if (IsFixedVectorType) {
2219 case Instruction::InsertElement:
2220 MadeChange |= foldInsExtFNeg(
I);
2222 case Instruction::ShuffleVector:
2223 MadeChange |= foldShuffleOfBinops(
I);
2224 MadeChange |= foldShuffleOfCastops(
I);
2225 MadeChange |= foldShuffleOfShuffles(
I);
2226 MadeChange |= foldSelectShuffle(
I);
2228 case Instruction::BitCast:
2229 MadeChange |= foldBitcastShuffle(
I);
2234 case Instruction::Call:
2235 MadeChange |= foldShuffleFromReductions(
I);
2236 MadeChange |= foldTruncFromReductions(
I);
2238 case Instruction::ICmp:
2239 case Instruction::FCmp:
2240 MadeChange |= foldExtractExtract(
I);
2244 MadeChange |= foldExtractExtract(
I);
2245 MadeChange |= foldExtractedCmps(
I);
2258 if (
I.isDebugOrPseudoInst())
2264 while (!Worklist.isEmpty()) {
2287 VectorCombine
Combiner(
F,
TTI, DT, AA, AC,
DL, TryEarlyFoldsOnly);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
'undef' values are things that do not have specified contents.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< UndefValue > m_UndefValue()
Match an arbitrary UndefValue constant.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.