Go to the documentation of this file.
34 #define DEBUG_TYPE "vector-combine"
40 STATISTIC(NumVecLoad,
"Number of vector loads formed");
41 STATISTIC(NumVecCmp,
"Number of vector compares formed");
42 STATISTIC(NumVecBO,
"Number of vector binops formed");
43 STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
44 STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
45 STATISTIC(NumScalarBO,
"Number of scalar binops formed");
46 STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
50 cl::desc(
"Disable all vector combine transforms"));
54 cl::desc(
"Disable binop extract to shuffle transforms"));
58 cl::desc(
"Max number of instructions to scan for vector combining."));
67 bool ScalarizationOnly)
69 ScalarizationOnly(ScalarizationOnly) {}
83 bool ScalarizationOnly;
90 unsigned PreferredExtractIndex)
const;
94 unsigned PreferredExtractIndex);
107 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
111 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
128 bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
131 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
147 auto *
Load = dyn_cast<LoadInst>(
X);
149 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
154 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
155 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
157 unsigned AS =
Load->getPointerAddressSpace();
164 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
172 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
174 unsigned OffsetEltIndex = 0;
181 unsigned OffsetBitWidth =
DL.getIndexTypeSizeInBits(SrcPtr->
getType());
192 uint64_t ScalarSizeInBytes = ScalarSize / 8;
193 if (
Offset.urem(ScalarSizeInBytes) != 0)
197 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
198 if (OffsetEltIndex >= MinVecNumElts)
230 unsigned OutputNumElts = Ty->getNumElements();
232 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
233 Mask[0] = OffsetEltIndex;
239 if (OldCost < NewCost || !NewCost.
isValid())
245 Value *CastedPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
246 SrcPtr, MinVecTy->getPointerTo(AS));
247 Value *VecLd =
Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
250 replaceValue(
I, *VecLd);
262 assert(Index0C && Index1C &&
"Expected constant extract indexes");
264 unsigned Index0 = Index0C->getZExtValue();
265 unsigned Index1 = Index1C->getZExtValue();
268 if (Index0 == Index1)
292 if (PreferredExtractIndex == Index0)
294 if (PreferredExtractIndex == Index1)
298 return Index0 > Index1 ? Ext0 : Ext1;
310 unsigned PreferredExtractIndex) {
311 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->
getOperand(1));
312 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->
getOperand(1));
313 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
315 unsigned Opcode =
I.getOpcode();
326 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
327 "Expected a compare");
337 unsigned Ext0Index = Ext0IndexC->getZExtValue();
338 unsigned Ext1Index = Ext1IndexC->getZExtValue();
362 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
364 OldCost = CheapExtractCost + ScalarOpCost;
365 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
369 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
370 NewCost = VectorOpCost + CheapExtractCost +
375 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
376 if (ConvertToShuffle) {
394 return OldCost < NewCost;
404 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
406 ShufMask[NewIndex] = OldIndex;
407 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
421 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
422 if (isa<Constant>(
X))
427 return cast<ExtractElementInst>(
Builder.CreateExtractElement(Shuf, NewIndex));
435 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
438 "Expected matching constant extract indexes");
446 replaceValue(
I, *NewExt);
454 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
457 "Expected matching constant extract indexes");
467 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
468 VecBOInst->copyIRFlags(&
I);
471 replaceValue(
I, *NewExt);
475 bool VectorCombine::foldExtractExtract(
Instruction &
I) {
499 auto *Ext0 = cast<ExtractElementInst>(I0);
500 auto *Ext1 = cast<ExtractElementInst>(
I1);
507 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
510 if (ExtractToChange) {
511 unsigned CheapExtractIdx = ExtractToChange == Ext0 ?
C1 : C0;
516 if (ExtractToChange == Ext0)
523 foldExtExtCmp(Ext0, Ext1,
I);
525 foldExtExtBinop(Ext0, Ext1,
I);
547 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
548 auto *SrcTy = dyn_cast<FixedVectorType>(V->
getType());
549 if (!SrcTy || !DestTy ||
I.getOperand(0)->getType() != SrcTy)
552 unsigned DestNumElts = DestTy->getNumElements();
553 unsigned SrcNumElts = SrcTy->getNumElements();
555 if (SrcNumElts <= DestNumElts) {
558 assert(DestNumElts % SrcNumElts == 0 &&
"Unexpected shuffle mask");
559 unsigned ScaleFactor = DestNumElts / SrcNumElts;
564 assert(SrcNumElts % DestNumElts == 0 &&
"Unexpected shuffle mask");
565 unsigned ScaleFactor = SrcNumElts / DestNumElts;
576 if (DestCost > SrcCost || !DestCost.
isValid())
582 Value *Shuf =
Builder.CreateShuffleVector(CastV, NewMask);
583 replaceValue(
I, *Shuf);
589 bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
600 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
602 for (
User *U :
I.users())
612 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
613 Value *V0 =
nullptr, *V1 =
nullptr;
626 if (IsConst0 && IsConst1)
628 if (!IsConst0 && !IsConst1 && Index0 != Index1)
633 auto *I0 = dyn_cast_or_null<Instruction>(V0);
634 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
635 if ((IsConst0 &&
I1 &&
I1->mayReadFromMemory()) ||
641 Type *VecTy =
I.getType();
646 "Unexpected types for insert element into binop or cmp");
648 unsigned Opcode =
I.getOpcode();
666 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
668 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
669 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
672 if (OldCost < NewCost || !NewCost.
isValid())
689 IsCmp ?
Builder.CreateCmp(Pred, V0, V1)
692 Scalar->setName(
I.getName() +
".scalar");
696 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
697 ScalarInst->copyIRFlags(&
I);
703 replaceValue(
I, *Insert);
713 if (!
I.isBinaryOp() || !
I.getType()->isIntegerTy(1))
719 Value *B0 =
I.getOperand(0), *
B1 =
I.getOperand(1);
737 auto *Ext0 = cast<ExtractElementInst>(I0);
738 auto *Ext1 = cast<ExtractElementInst>(
I1);
748 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
764 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
765 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
770 ShufMask[CheapIndex] = ExpensiveIndex;
779 if (OldCost < NewCost || !NewCost.
isValid())
792 Value *NewExt =
Builder.CreateExtractElement(VecLogic, CheapIndex);
793 replaceValue(
I, *NewExt);
802 unsigned NumScanned = 0;
804 return isModSet(
AA.getModRefInfo(&Instr, Loc)) ||
812 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
823 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
829 return {StatusTy::SafeWithFreeze, ToFreeze};
843 Status = StatusTy::Unsafe;
848 assert(isSafeWithFreeze() &&
849 "should only be used when freezing is required");
851 "UserI must be a user of ToFreeze");
853 Builder.SetInsertPoint(cast<Instruction>(&UserI));
857 if (U.get() == ToFreeze)
870 if (
auto *
C = dyn_cast<ConstantInt>(Idx)) {
877 APInt Zero(IntWidth, 0);
884 true, &AC, CtxI, &DT)))
899 if (ValidIndices.
contains(IdxRange))
911 if (
auto *
C = dyn_cast<ConstantInt>(Idx))
913 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
925 bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
927 if (!
SI || !
SI->isSimple() ||
928 !isa<FixedVectorType>(
SI->getValueOperand()->getType()))
936 if (!
match(
SI->getValueOperand(),
941 if (
auto *
Load = dyn_cast<LoadInst>(
Source)) {
942 auto VecTy = cast<FixedVectorType>(
SI->getValueOperand()->getType());
944 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
947 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
948 !
DL.typeSizeEqualsStoreSize(
Load->getType()) ||
949 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
953 if (ScalarizableIdx.isUnsafe() ||
958 if (ScalarizableIdx.isSafeWithFreeze())
959 ScalarizableIdx.freeze(
Builder, *cast<Instruction>(Idx));
961 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
962 {ConstantInt::get(Idx->getType(), 0), Idx});
969 replaceValue(
I, *NSI);
978 bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
983 auto *LI = cast<LoadInst>(&
I);
985 if (LI->isVolatile() || !
DL.typeSizeEqualsStoreSize(LI->getType()))
988 auto *FixedVT = dyn_cast<FixedVectorType>(LI->getType());
994 LI->getPointerAddressSpace());
998 unsigned NumInstChecked = 0;
1003 auto *UI = dyn_cast<ExtractElementInst>(U);
1004 if (!UI || UI->getParent() != LI->getParent())
1014 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1021 LastCheckedInst = UI;
1025 if (!ScalarIdx.isSafe()) {
1027 ScalarIdx.discard();
1031 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1034 Index ?
Index->getZExtValue() : -1);
1037 Align(1), LI->getPointerAddressSpace());
1041 if (ScalarizedCost >= OriginalCost)
1046 auto *EI = cast<ExtractElementInst>(U);
1049 Value *Idx = EI->getOperand(1);
1051 Builder.CreateInBoundsGEP(FixedVT, Ptr, {
Builder.getInt32(0), Idx});
1052 auto *NewLoad = cast<LoadInst>(
Builder.CreateLoad(
1053 FixedVT->getElementType(),
GEP, EI->getName() +
".scalar"));
1056 LI->getAlign(), FixedVT->getElementType(), Idx,
DL);
1057 NewLoad->setAlignment(ScalarOpAlignment);
1059 replaceValue(*EI, *NewLoad);
1067 bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1068 auto *VecTy = dyn_cast<FixedVectorType>(
I.getType());
1087 if (ShufCost > BinopCost)
1092 Value *
Z =
B1->getOperand(0), *
W =
B1->getOperand(1);
1096 Value *Shuf0, *Shuf1;
1099 Shuf0 =
Builder.CreateShuffleVector(
X, UnaryMask);
1101 }
else if (
Y ==
W) {
1104 Shuf1 =
Builder.CreateShuffleVector(
Y, UnaryMask);
1109 Value *NewBO =
Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
1111 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1112 NewInst->copyIRFlags(B0);
1113 NewInst->andIRFlags(
B1);
1115 replaceValue(
I, *NewBO);
1122 bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
1123 auto *II = dyn_cast<IntrinsicInst>(&
I);
1126 switch (II->getIntrinsicID()) {
1127 case Intrinsic::vector_reduce_add:
1128 case Intrinsic::vector_reduce_mul:
1129 case Intrinsic::vector_reduce_and:
1130 case Intrinsic::vector_reduce_or:
1131 case Intrinsic::vector_reduce_xor:
1132 case Intrinsic::vector_reduce_smin:
1133 case Intrinsic::vector_reduce_smax:
1134 case Intrinsic::vector_reduce_umin:
1135 case Intrinsic::vector_reduce_umax:
1144 std::queue<Value *> Worklist;
1147 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
1150 while (!Worklist.empty()) {
1151 Value *CV = Worklist.front();
1162 if (
auto *CI = dyn_cast<Instruction>(CV)) {
1163 if (CI->isBinaryOp()) {
1164 for (
auto *
Op : CI->operand_values())
1167 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
1168 if (Shuffle && Shuffle != SV)
1185 for (
auto *V : Visited)
1186 for (
auto *U : V->
users())
1187 if (!Visited.contains(U) && U != &
I)
1191 dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
1196 if (!ShuffleInputType)
1204 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
1205 bool UsesSecondVec =
1206 any_of(ConcatMask, [&](
int M) {
return M >= NumInputElts; });
1214 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
1216 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1218 if (NewCost < OldCost) {
1219 Builder.SetInsertPoint(Shuffle);
1222 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
1223 replaceValue(*Shuffle, *NewShuffle);
1228 return foldSelectShuffle(*Shuffle,
true);
1241 bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
1242 auto *SVI = dyn_cast<ShuffleVectorInst>(&
I);
1243 auto *VT = dyn_cast<FixedVectorType>(
I.getType());
1246 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
1247 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
1248 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
1249 VT != Op0->getType())
1251 auto *SVI0A = dyn_cast<ShuffleVectorInst>(Op0->getOperand(0));
1252 auto *SVI0B = dyn_cast<ShuffleVectorInst>(Op0->getOperand(1));
1253 auto *SVI1A = dyn_cast<ShuffleVectorInst>(Op1->getOperand(0));
1254 auto *SVI1B = dyn_cast<ShuffleVectorInst>(Op1->getOperand(1));
1256 if (!
I ||
I->getOperand(0)->getType() != VT)
1258 return any_of(
I->users(), [&](
User *U) { return U != Op0 && U != Op1; });
1260 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
1261 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
1269 for (
auto *U :
I->users()) {
1270 auto *SV = dyn_cast<ShuffleVectorInst>(U);
1271 if (!SV || SV->getType() != VT)
1273 if (
find(Shuffles, SV) == Shuffles.end())
1274 Shuffles.push_back(SV);
1278 if (!collectShuffles(Op0) || !collectShuffles(Op1))
1282 if (FromReduction && Shuffles.size() > 1)
1292 int MaxV1Elt = 0, MaxV2Elt = 0;
1293 unsigned NumElts = VT->getNumElements();
1296 SVN->getShuffleMask(
Mask);
1300 Value *SVOp0 = SVN->getOperand(0);
1301 Value *SVOp1 = SVN->getOperand(1);
1302 if (SVOp0 == Op1 && SVOp1 == Op0) {
1306 if (SVOp0 != Op0 || SVOp1 != Op1)
1313 for (
unsigned I = 0;
I <
Mask.size();
I++) {
1315 ReconstructMask.push_back(-1);
1316 }
else if (
Mask[
I] <
static_cast<int>(NumElts)) {
1320 ReconstructMask.push_back(It - V1.begin());
1322 ReconstructMask.push_back(V1.size());
1323 V1.push_back(
Mask[
I]);
1326 MaxV2Elt = std::max<int>(MaxV2Elt,
Mask[
I] - NumElts);
1329 ReconstructMask.push_back(NumElts + It -
V2.begin());
1331 ReconstructMask.push_back(NumElts +
V2.size());
1332 V2.push_back(
Mask[
I] - NumElts);
1340 sort(ReconstructMask);
1341 ReconstructMasks.push_back(ReconstructMask);
1348 if (V1.empty() ||
V2.empty() ||
1349 (MaxV1Elt ==
static_cast<int>(V1.size()) - 1 &&
1350 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
1356 for (
unsigned I = 0;
I < V1.size();
I++) {
1357 V1A.push_back(SVI0A->getMaskValue(V1[
I]));
1358 V1B.push_back(SVI0B->getMaskValue(V1[
I]));
1360 for (
unsigned I = 0;
I <
V2.size();
I++) {
1361 V2A.push_back(SVI1A->getMaskValue(
V2[
I]));
1362 V2B.push_back(SVI1B->getMaskValue(
V2[
I]));
1364 while (V1A.size() < NumElts) {
1368 while (V2A.size() < NumElts) {
1386 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
1390 {SVI0A, SVI0B, SVI1A, SVI1B});
1391 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
1403 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
1405 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
1407 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
1410 if (CostBefore <= CostAfter)
1414 Builder.SetInsertPoint(SVI0A);
1415 Value *NSV0A =
Builder.CreateShuffleVector(SVI0A->getOperand(0),
1416 SVI0A->getOperand(1), V1A);
1417 Builder.SetInsertPoint(SVI0B);
1418 Value *NSV0B =
Builder.CreateShuffleVector(SVI0B->getOperand(0),
1419 SVI0B->getOperand(1), V1B);
1420 Builder.SetInsertPoint(SVI1A);
1421 Value *NSV1A =
Builder.CreateShuffleVector(SVI1A->getOperand(0),
1422 SVI1A->getOperand(1), V2A);
1423 Builder.SetInsertPoint(SVI1B);
1424 Value *NSV1B =
Builder.CreateShuffleVector(SVI1B->getOperand(0),
1425 SVI1B->getOperand(1), V2B);
1429 if (
auto *
I = dyn_cast<Instruction>(NOp0))
1430 I->copyIRFlags(Op0,
true);
1434 if (
auto *
I = dyn_cast<Instruction>(NOp1))
1435 I->copyIRFlags(Op1,
true);
1437 for (
int S = 0,
E = ReconstructMasks.size();
S !=
E;
S++) {
1438 Builder.SetInsertPoint(Shuffles[
S]);
1439 Value *NSV =
Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[
S]);
1440 replaceValue(*Shuffles[
S], *NSV);
1443 Worklist.pushValue(NSV0A);
1444 Worklist.pushValue(NSV0B);
1445 Worklist.pushValue(NSV1A);
1446 Worklist.pushValue(NSV1B);
1447 for (
auto *
S : Shuffles)
1462 bool MadeChange =
false;
1465 if (!ScalarizationOnly) {
1466 MadeChange |= vectorizeLoadInsert(
I);
1467 MadeChange |= foldExtractExtract(
I);
1468 MadeChange |= foldBitcastShuf(
I);
1469 MadeChange |= foldExtractedCmps(
I);
1470 MadeChange |= foldShuffleOfBinops(
I);
1471 MadeChange |= foldShuffleFromReductions(
I);
1472 MadeChange |= foldSelectShuffle(
I);
1474 MadeChange |= scalarizeBinopOrCmp(
I);
1475 MadeChange |= scalarizeLoadExtract(
I);
1476 MadeChange |= foldSingleElementStore(
I);
1484 if (
I.isDebugOrPseudoInst())
1490 while (!Worklist.isEmpty()) {
1530 if (skipFunction(
F))
1532 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1533 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
1534 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1535 auto &
AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1544 "Optimize scalar/vector ops",
false,
1551 return new VectorCombineLegacyPass();
A set of analyses that are preserved following a run of a transformation pass.
A manager for alias analyses.
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Analysis pass providing the TargetTransformInfo.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A parsed version of the target data layout string in and methods for querying it.
bool hasOneUse() const
Return true if there is exactly one use of this value.
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
Vector Rotate Left Mask Mask Insert
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
InstListType::iterator iterator
Instruction iterators...
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isPointerTy() const
True if this is an instance of PointerType.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
bool isUnsafe() const
Returns true if the index cannot be scalarized.
static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
instcombine should handle this C2 when C1
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionAnalysisManager FAM
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Pass * createVectorCombinePass()
The instances of the Type class are immutable: once they are created, they are never changed.
void setAlignment(Align Align)
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", "Optimize scalar/vector ops", false, false) INITIALIZE_PASS_END(VectorCombineLegacyPass
static const unsigned InvalidIndex
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
Class to represent fixed width SIMD vectors.
LLVM Basic Block Representation.
constexpr int UndefMaskElem
OneUse_match< T > m_OneUse(const T &SubPattern)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned getNumElements() const
This is the shared class of boolean and integer constants.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
bool isFPPredicate() const
bool match(Val *V, const Pattern &P)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Represent the analysis usage information of a pass.
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
bool isVectorTy() const
True if this is an instance of VectorType.
void remove(Instruction *I)
Remove I from the worklist if it exists.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
BinaryOps getOpcode() const
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Legacy analysis pass which computes a DominatorTree.
STATISTIC(NumFunctions, "Total number of functions")
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
bool isSafeWithFreeze() const
Returns true if the index can be scalarize, but requires inserting a freeze.
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void freeze(IRBuilder<> &Builder, Instruction &UserI)
Freeze the ToFreeze and update the use in User to use it.
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
An instruction for storing to memory.
This is an important base class in LLVM.
void push(Instruction *I)
Push the instruction onto the worklist stack.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
A function analysis which provides an AssumptionCache.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Legacy wrapper pass to provide the BasicAAResult object.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
StandardInstrumentations SI(Debug, VerifyEach)
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Class for arbitrary precision integers.
An immutable pass that tracks lazily created AssumptionCache objects.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
auto m_Undef()
Match an arbitrary undef constant.
A cache of @llvm.assume calls within a function.
Type * getType() const
All values are typed, get the type of this value.
Represents analyses that only rely on functions' control flow.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
bool isSafe() const
Returns true if the index can be scalarize without requiring a freeze.
void initializeVectorCombineLegacyPassPass(PassRegistry &)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
static Constant * get(ArrayRef< Constant * > V)
bool mayReadFromMemory() const
Return true if this instruction may read memory.
StringRef getName() const
Return a constant reference to the value's name.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
static bool runOnFunction(Function &F, bool PostInlining)
void discard()
Reset the state of Unsafe and clear ToFreze if set.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
A constant value that is initialized with an expression using other constant values.
static ScalarizationResult unsafe()
Helper class to indicate whether a vector index can be safely scalarized and if a freeze needs to be ...
vector Optimize scalar vector ops
Should compile to something r4 addze r3 instead we get
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
void sort(IteratorTy Start, IteratorTy End)
static ScalarizationResult safeWithFreeze(Value *ToFreeze)
static ScalarizationResult safe()
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
Analysis pass which computes a DominatorTree.
This instruction constructs a fixed permutation of two input vectors.
Pass interface - Implemented by all 'passes'.
void preserveSet()
Mark an analysis set as preserved.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Legacy wrapper pass to provide the GlobalsAAResult object.
Align max(MaybeAlign Lhs, Align Rhs)
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
Value * getOperand(unsigned i) const
bool contains(ConstPtrType Ptr) const
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
LLVM Value Representation.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
iterator_range< user_iterator > users()
Representation for a specific memory location.
Optional< std::vector< StOtherPiece > > Other
A Use represents the edge between a Value definition and its users.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)