31#define DEBUG_TYPE "vectorutils"
39 cl::desc(
"Maximum factor for an interleaved access group (default = 8)"),
49 case Intrinsic::bswap:
50 case Intrinsic::bitreverse:
51 case Intrinsic::ctpop:
60 case Intrinsic::sadd_sat:
61 case Intrinsic::ssub_sat:
62 case Intrinsic::uadd_sat:
63 case Intrinsic::usub_sat:
64 case Intrinsic::smul_fix:
65 case Intrinsic::smul_fix_sat:
66 case Intrinsic::umul_fix:
67 case Intrinsic::umul_fix_sat:
68 case Intrinsic::uadd_with_overflow:
69 case Intrinsic::sadd_with_overflow:
70 case Intrinsic::usub_with_overflow:
71 case Intrinsic::ssub_with_overflow:
72 case Intrinsic::umul_with_overflow:
73 case Intrinsic::smul_with_overflow:
78 case Intrinsic::atan2:
81 case Intrinsic::sincos:
82 case Intrinsic::sincospi:
88 case Intrinsic::exp10:
90 case Intrinsic::frexp:
91 case Intrinsic::ldexp:
93 case Intrinsic::log10:
96 case Intrinsic::minnum:
97 case Intrinsic::maxnum:
98 case Intrinsic::minimum:
99 case Intrinsic::maximum:
100 case Intrinsic::minimumnum:
101 case Intrinsic::maximumnum:
102 case Intrinsic::modf:
103 case Intrinsic::copysign:
104 case Intrinsic::floor:
105 case Intrinsic::ceil:
106 case Intrinsic::trunc:
107 case Intrinsic::rint:
108 case Intrinsic::nearbyint:
109 case Intrinsic::round:
110 case Intrinsic::roundeven:
113 case Intrinsic::fmuladd:
114 case Intrinsic::is_fpclass:
115 case Intrinsic::powi:
116 case Intrinsic::canonicalize:
117 case Intrinsic::fptosi_sat:
118 case Intrinsic::fptoui_sat:
119 case Intrinsic::lround:
120 case Intrinsic::llround:
121 case Intrinsic::lrint:
122 case Intrinsic::llrint:
123 case Intrinsic::ucmp:
124 case Intrinsic::scmp:
125 case Intrinsic::clmul:
141 unsigned ScalarOpdIdx,
145 return TTI->isTargetIntrinsicWithScalarOpAtArg(
ID, ScalarOpdIdx);
153 case Intrinsic::vp_abs:
154 case Intrinsic::ctlz:
155 case Intrinsic::vp_ctlz:
156 case Intrinsic::cttz:
157 case Intrinsic::vp_cttz:
158 case Intrinsic::is_fpclass:
159 case Intrinsic::vp_is_fpclass:
160 case Intrinsic::powi:
161 case Intrinsic::vector_extract:
162 return (ScalarOpdIdx == 1);
163 case Intrinsic::smul_fix:
164 case Intrinsic::smul_fix_sat:
165 case Intrinsic::umul_fix:
166 case Intrinsic::umul_fix_sat:
167 return (ScalarOpdIdx == 2);
168 case Intrinsic::experimental_vp_splice:
169 return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
180 return TTI->isTargetIntrinsicWithOverloadTypeAtArg(
ID, OpdIdx);
183 return OpdIdx == -1 || OpdIdx == 0;
186 case Intrinsic::fptosi_sat:
187 case Intrinsic::fptoui_sat:
188 case Intrinsic::lround:
189 case Intrinsic::llround:
190 case Intrinsic::lrint:
191 case Intrinsic::llrint:
192 case Intrinsic::vp_lrint:
193 case Intrinsic::vp_llrint:
194 case Intrinsic::ucmp:
195 case Intrinsic::scmp:
196 case Intrinsic::vector_extract:
197 return OpdIdx == -1 || OpdIdx == 0;
198 case Intrinsic::modf:
199 case Intrinsic::sincos:
200 case Intrinsic::sincospi:
201 case Intrinsic::is_fpclass:
202 case Intrinsic::vp_is_fpclass:
204 case Intrinsic::powi:
205 case Intrinsic::ldexp:
206 return OpdIdx == -1 || OpdIdx == 1;
216 return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(
ID, RetIdx);
219 case Intrinsic::frexp:
220 return RetIdx == 0 || RetIdx == 1;
236 ID == Intrinsic::lifetime_end ||
ID == Intrinsic::assume ||
237 ID == Intrinsic::experimental_noalias_scope_decl ||
238 ID == Intrinsic::sideeffect ||
ID == Intrinsic::pseudoprobe)
245 case Intrinsic::vector_interleave2:
247 case Intrinsic::vector_interleave3:
249 case Intrinsic::vector_interleave4:
251 case Intrinsic::vector_interleave5:
253 case Intrinsic::vector_interleave6:
255 case Intrinsic::vector_interleave7:
257 case Intrinsic::vector_interleave8:
266 case Intrinsic::vector_deinterleave2:
268 case Intrinsic::vector_deinterleave3:
270 case Intrinsic::vector_deinterleave4:
272 case Intrinsic::vector_deinterleave5:
274 case Intrinsic::vector_deinterleave6:
276 case Intrinsic::vector_deinterleave7:
278 case Intrinsic::vector_deinterleave8:
286 [[maybe_unused]]
unsigned Factor =
289 assert(Factor && Factor == DISubtypes.
size() &&
290 "unexpected deinterleave factor or result type");
298 assert(V->getType()->isVectorTy() &&
"Not looking at a vector?");
302 unsigned Width = FVTy->getNumElements();
308 return C->getAggregateElement(EltNo);
319 return III->getOperand(1);
322 if (III == III->getOperand(0))
338 if (InEl < (
int)LHSWidth)
347 if (
Constant *Elt =
C->getAggregateElement(EltNo))
348 if (Elt->isNullValue())
354 if (EltNo < VTy->getElementCount().getKnownMinValue())
369 if (SplatIndex != -1 && SplatIndex != M)
375 assert((SplatIndex == -1 || SplatIndex >= 0) &&
"Negative index?");
386 return C->getSplatValue();
407 return C->getSplatValue() !=
nullptr;
422 return Shuf->getMaskValue(Index) == Index;
445 const APInt &DemandedElts,
APInt &DemandedLHS,
446 APInt &DemandedRHS,
bool AllowUndefElts) {
450 if (DemandedElts.
isZero())
459 for (
unsigned I = 0, E = Mask.size();
I != E; ++
I) {
461 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
462 "Invalid shuffle mask constant");
464 if (!DemandedElts[
I] || (AllowUndefElts && (M < 0)))
475 DemandedRHS.
setBit(M - SrcWidth);
482 std::array<std::pair<int, int>, 2> &SrcInfo) {
483 const int SignalValue = NumElts * 2;
484 SrcInfo[0] = {-1, SignalValue};
485 SrcInfo[1] = {-1, SignalValue};
489 int Src = M >= NumElts;
490 int Diff = (int)i - (M % NumElts);
492 for (
int j = 0; j < 2; j++) {
493 auto &[SrcE, DiffE] = SrcInfo[j];
495 assert(DiffE == SignalValue);
499 if (SrcE == Src && DiffE == Diff) {
508 return SrcInfo[0].first != -1;
513 assert(Scale > 0 &&
"Unexpected scaling factor");
517 ScaledMask.
assign(Mask.begin(), Mask.end());
522 for (
int MaskElt : Mask) {
525 "Overflowed 32-bits");
527 for (
int SliceElt = 0; SliceElt != Scale; ++SliceElt)
528 ScaledMask.
push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
534 assert(Scale > 0 &&
"Unexpected scaling factor");
538 ScaledMask.
assign(Mask.begin(), Mask.end());
543 int NumElts = Mask.size();
544 if (NumElts % Scale != 0)
548 ScaledMask.
reserve(NumElts / Scale);
553 assert((
int)MaskSlice.
size() == Scale &&
"Expected Scale-sized slice.");
556 int SliceFront = MaskSlice.
front();
557 if (SliceFront < 0) {
565 if (SliceFront % Scale != 0)
568 for (
int i = 1; i < Scale; ++i)
569 if (MaskSlice[i] != SliceFront + i)
571 ScaledMask.
push_back(SliceFront / Scale);
573 Mask = Mask.drop_front(Scale);
574 }
while (!Mask.empty());
576 assert((
int)ScaledMask.
size() * Scale == NumElts &&
"Unexpected scaled mask");
585 unsigned NumElts = M.size();
586 if (NumElts % 2 != 0)
590 for (
unsigned i = 0; i < NumElts; i += 2) {
595 if (
M0 == -1 &&
M1 == -1) {
600 if (
M0 == -1 &&
M1 != -1 && (
M1 % 2) == 1) {
605 if (
M0 != -1 && (
M0 % 2) == 0 && ((
M0 + 1) ==
M1 ||
M1 == -1)) {
614 assert(NewMask.
size() == NumElts / 2 &&
"Incorrect size for mask!");
620 unsigned NumSrcElts = Mask.size();
621 assert(NumSrcElts > 0 && NumDstElts > 0 &&
"Unexpected scaling factor");
624 if (NumSrcElts == NumDstElts) {
625 ScaledMask.
assign(Mask.begin(), Mask.end());
630 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
631 "Unexpected scaling factor");
633 if (NumSrcElts > NumDstElts) {
634 int Scale = NumSrcElts / NumDstElts;
638 int Scale = NumDstElts / NumSrcElts;
645 std::array<SmallVector<int, 16>, 2> TmpMasks;
648 for (
unsigned Scale = 2; Scale <= InputMask.
size(); ++Scale) {
658 ArrayRef<int> Mask,
unsigned NumOfSrcRegs,
unsigned NumOfDestRegs,
659 unsigned NumOfUsedRegs,
function_ref<
void()> NoInputAction,
668 int Sz = Mask.size();
669 unsigned SzDest = Sz / NumOfDestRegs;
670 unsigned SzSrc = Sz / NumOfSrcRegs;
671 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
672 auto &RegMasks = Res[
I];
673 RegMasks.
assign(2 * NumOfSrcRegs, {});
676 for (
unsigned K = 0; K < SzDest; ++K) {
677 int Idx =
I * SzDest + K;
682 int MaskIdx = Mask[Idx] % Sz;
683 int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);
686 if (RegMasks[SrcRegIdx].empty())
688 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
696 switch (NumSrcRegs) {
705 unsigned SrcReg = std::distance(Dest.begin(), It);
706 SingleInputAction(*It, SrcReg,
I);
718 for (
int Idx = 0, VF = FirstMask.
size(); Idx < VF; ++Idx) {
721 "Expected undefined mask element.");
722 FirstMask[Idx] = SecondMask[Idx] + VF;
727 for (
int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
743 if (FirstIdx == SecondIdx) {
749 SecondMask = RegMask;
750 CombineMasks(FirstMask, SecondMask);
751 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);
753 NormalizeMask(FirstMask);
755 SecondMask = FirstMask;
756 SecondIdx = FirstIdx;
758 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
759 CombineMasks(SecondMask, FirstMask);
760 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);
762 Dest[FirstIdx].clear();
763 NormalizeMask(SecondMask);
765 }
while (SecondIdx >= 0);
773 const APInt &DemandedElts,
775 APInt &DemandedRHS) {
776 assert(VectorBitWidth >= 128 &&
"Vectors smaller than 128 bit not supported");
777 int NumLanes = VectorBitWidth / 128;
779 int NumEltsPerLane = NumElts / NumLanes;
780 int HalfEltsPerLane = NumEltsPerLane / 2;
786 for (
int Idx = 0; Idx != NumElts; ++Idx) {
787 if (!DemandedElts[Idx])
789 int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
790 int LocalIdx = Idx % NumEltsPerLane;
791 if (LocalIdx < HalfEltsPerLane) {
792 DemandedLHS.
setBit(LaneIdx + 2 * LocalIdx);
794 LocalIdx -= HalfEltsPerLane;
795 DemandedRHS.
setBit(LaneIdx + 2 * LocalIdx);
816 bool SeenExtFromIllegalType =
false;
817 for (
auto *BB : Blocks)
818 for (
auto &
I : *BB) {
819 InstructionSet.insert(&
I);
822 !
TTI->isTypeLegal(
I.getOperand(0)->getType()))
823 SeenExtFromIllegalType =
true;
827 !
I.getType()->isVectorTy() &&
828 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
839 if (Worklist.
empty() || (
TTI && !SeenExtFromIllegalType))
843 while (!Worklist.
empty()) {
852 if (DB.getDemandedBits(
I).getBitWidth() > 64)
855 uint64_t V = DB.getDemandedBits(
I).getZExtValue();
862 !InstructionSet.count(
I))
869 !
I->getType()->isIntegerTy()) {
870 DBits[Leader] |= ~0ULL;
885 if (DBits[Leader] == ~0ULL)
889 for (
Value *O :
I->operands()) {
899 for (
auto &
I : DBits)
900 for (
auto *U :
I.first->users())
901 if (U->getType()->isIntegerTy() && DBits.
count(U) == 0)
904 for (
const auto &E : ECs) {
909 LeaderDemandedBits |= DBits[M];
932 Type *Ty = M->getType();
934 Ty =
MI->getOperand(0)->getType();
936 if (MinBW >= Ty->getScalarSizeInBits())
949 U.getOperandNo() == 1)
950 return CI->uge(MinBW);
964template <
typename ListT>
969 List.insert(AccGroups);
973 for (
const auto &AccGroupListOp : AccGroups->
operands()) {
985 if (AccGroups1 == AccGroups2)
992 if (Union.size() == 0)
994 if (Union.size() == 1)
1006 if (!MayAccessMem1 && !MayAccessMem2)
1009 return Inst2->
getMetadata(LLVMContext::MD_access_group);
1011 return Inst1->
getMetadata(LLVMContext::MD_access_group);
1027 if (AccGroupSet2.
count(MD1))
1033 if (AccGroupSet2.
count(Item))
1038 if (Intersection.
size() == 0)
1040 if (Intersection.
size() == 1)
1053 static const unsigned SupportedIDs[] = {
1054 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1055 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
1056 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
1057 LLVMContext::MD_access_group, LLVMContext::MD_mmra};
1060 for (
unsigned Idx = 0; Idx !=
Metadata.size();) {
1078 for (
auto &[Kind, MD] :
Metadata) {
1083 for (
int J = 1, E = VL.
size(); MD && J != E; ++J) {
1088 case LLVMContext::MD_mmra: {
1092 case LLVMContext::MD_tbaa:
1095 case LLVMContext::MD_alias_scope:
1098 case LLVMContext::MD_fpmath:
1101 case LLVMContext::MD_noalias:
1102 case LLVMContext::MD_nontemporal:
1103 case LLVMContext::MD_invariant_load:
1106 case LLVMContext::MD_access_group:
1131 for (
unsigned i = 0; i < VF; i++)
1132 for (
unsigned j = 0; j < Group.
getFactor(); ++j) {
1133 unsigned HasMember = Group.
getMember(j) ? 1 : 0;
1134 Mask.push_back(Builder.getInt1(HasMember));
1143 for (
unsigned i = 0; i < VF; i++)
1144 for (
unsigned j = 0; j < ReplicationFactor; j++)
1153 for (
unsigned i = 0; i < VF; i++)
1154 for (
unsigned j = 0; j < NumVecs; j++)
1155 Mask.push_back(j * VF + i);
1163 for (
unsigned i = 0; i < VF; i++)
1164 Mask.push_back(Start + i * Stride);
1171 unsigned NumUndefs) {
1173 for (
unsigned i = 0; i < NumInts; i++)
1174 Mask.push_back(Start + i);
1176 for (
unsigned i = 0; i < NumUndefs; i++)
1185 int NumEltsSigned = NumElts;
1186 assert(NumEltsSigned > 0 &&
"Expected smaller or non-zero element count");
1191 for (
int MaskElt : Mask) {
1192 assert((MaskElt < NumEltsSigned * 2) &&
"Expected valid shuffle mask");
1193 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1206 assert(VecTy1 && VecTy2 &&
1207 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1208 "Expect two vectors with the same element type");
1212 assert(NumElts1 >= NumElts2 &&
"Unexpect the first vector has less elements");
1214 if (NumElts1 > NumElts2) {
1216 V2 = Builder.CreateShuffleVector(
1220 return Builder.CreateShuffleVector(
1226 unsigned NumVecs = Vecs.
size();
1227 assert(NumVecs > 1 &&
"Should be at least two vectors");
1233 for (
unsigned i = 0; i < NumVecs - 1; i += 2) {
1234 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1235 assert((V0->
getType() == V1->getType() || i == NumVecs - 2) &&
1236 "Only the last vector may have a different type");
1242 if (NumVecs % 2 != 0)
1243 TmpList.
push_back(ResList[NumVecs - 1]);
1246 NumVecs = ResList.
size();
1247 }
while (NumVecs > 1);
1257 "Mask must be a vector of i1");
1270 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1283 "Mask must be a vector of i1");
1296 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1309 "Mask must be a vector of i1");
1322 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1336 "Mask must be a fixed width vector of i1");
1338 const unsigned VWidth =
1342 for (
unsigned i = 0; i < VWidth; i++)
1343 if (CV->getAggregateElement(i)->isNullValue())
1345 return DemandedElts;
1348bool InterleavedAccessInfo::isStrided(
int Stride) {
1349 unsigned Factor = std::abs(Stride);
1353void InterleavedAccessInfo::collectConstStrideAccesses(
1356 auto &
DL = TheLoop->getHeader()->getDataLayout();
1364 LoopBlocksDFS DFS(TheLoop);
1366 for (BasicBlock *BB :
make_range(DFS.beginRPO(), DFS.endRPO()))
1367 for (
auto &
I : *BB) {
1375 uint64_t
Size =
DL.getTypeAllocSize(ElementTy);
1376 if (
Size * 8 !=
DL.getTypeSizeInBits(ElementTy))
1386 int64_t Stride =
getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,
1391 AccessStrideInfo[&
I] = StrideDescriptor(Stride, Scev,
Size,
1433 bool EnablePredicatedInterleavedMemAccesses) {
1435 const auto &Strides = LAI->getSymbolicStrides();
1439 collectConstStrideAccesses(AccessStrideInfo, Strides);
1441 if (AccessStrideInfo.
empty())
1445 collectDependences();
1466 for (
auto BI = AccessStrideInfo.
rbegin(), E = AccessStrideInfo.
rend();
1469 StrideDescriptor DesB = BI->second;
1475 if (isStrided(DesB.Stride) &&
1476 (!isPredicated(
B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1481 GroupB = createInterleaveGroup(
B, DesB.Stride, DesB.Alignment);
1482 if (
B->mayWriteToMemory())
1483 StoreGroups.
insert(GroupB);
1485 LoadGroups.
insert(GroupB);
1489 for (
auto AI = std::next(BI); AI != E; ++AI) {
1491 StrideDescriptor DesA = AI->second;
1516 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1517 A, &*AccessStrideInfo.
find(MemberOfGroupB)))
1518 return MemberOfGroupB;
1528 if (
A->mayWriteToMemory() && GroupA != GroupB) {
1536 if (GroupB && LoadGroups.
contains(GroupB))
1537 DependentInst = DependentMember(GroupB, &*AI);
1538 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1541 if (DependentInst) {
1546 if (GroupA && StoreGroups.
contains(GroupA)) {
1548 "dependence between "
1549 << *
A <<
" and " << *DependentInst <<
'\n');
1550 StoreGroups.
remove(GroupA);
1551 releaseGroup(GroupA);
1557 if (GroupB && LoadGroups.
contains(GroupB)) {
1559 <<
" as complete.\n");
1560 CompletedLoadGroups.
insert(GroupB);
1564 if (CompletedLoadGroups.
contains(GroupB)) {
1572 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1582 (
A->mayReadFromMemory() !=
B->mayReadFromMemory()) ||
1583 (
A->mayWriteToMemory() !=
B->mayWriteToMemory()))
1588 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1598 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1605 if (DistanceToB %
static_cast<int64_t
>(DesB.Size))
1612 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1613 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1619 GroupB->
getIndex(
B) + DistanceToB /
static_cast<int64_t
>(DesB.Size);
1624 <<
" into the interleave group with" << *
B
1626 InterleaveGroupMap[
A] = GroupB;
1629 if (
A->mayReadFromMemory())
1637 const char *FirstOrLast) ->
bool {
1639 assert(Member &&
"Group member does not exist");
1642 if (
getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,
1646 LLVM_DEBUG(
dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1648 <<
" group member potentially pointer-wrapping.\n");
1649 releaseGroup(Group);
1667 for (
auto *Group : LoadGroups) {
1679 if (InvalidateGroupIfMemberMayWrap(Group, 0,
"first"))
1682 InvalidateGroupIfMemberMayWrap(Group, Group->
getFactor() - 1,
"last");
1691 dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1692 "a reverse access with gaps.\n");
1693 releaseGroup(Group);
1697 dbgs() <<
"LV: Interleaved group requires epilogue iteration.\n");
1698 RequiresScalarEpilogue =
true;
1702 for (
auto *Group : StoreGroups) {
1712 if (!EnablePredicatedInterleavedMemAccesses) {
1714 dbgs() <<
"LV: Invalidate candidate interleaved store group due "
1716 releaseGroup(Group);
1726 if (InvalidateGroupIfMemberMayWrap(Group, 0,
"first"))
1728 for (
int Index = Group->
getFactor() - 1; Index > 0; Index--)
1730 InvalidateGroupIfMemberMayWrap(Group, Index,
"last");
1744 bool ReleasedGroup = InterleaveGroups.
remove_if([&](
auto *Group) {
1745 if (!Group->requiresScalarEpilogue())
1749 <<
"LV: Invalidate candidate interleaved group due to gaps that "
1750 "require a scalar epilogue (not allowed under optsize) and cannot "
1751 "be masked (not enabled). \n");
1752 releaseGroupWithoutRemovingFromSet(Group);
1755 assert(ReleasedGroup &&
"At least one group must be invalidated, as a "
1756 "scalar epilogue was required");
1757 (void)ReleasedGroup;
1758 RequiresScalarEpilogue =
false;
1761template <
typename InstT>
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
reverse_iterator rbegin()
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a constant integer value.
const APInt & getAPInt() const
bool remove(const value_type &X)
Remove an item from the set vector.
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
ArrayRef< Type * > subtypes() const
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPCast(Intrinsic::ID ID)
static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI bool isTriviallyScalarizable(ID id)
Returns true if the intrinsic is trivially scalarizable.
LLVM_ABI bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially scalarizable.
LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned M0(unsigned Val)
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.