Go to the documentation of this file.
29 #define DEBUG_TYPE "vectorutils"
37 cl::desc(
"Maximum factor for an interleaved access group (default = 8)"),
47 case Intrinsic::bswap:
48 case Intrinsic::bitreverse:
49 case Intrinsic::ctpop:
58 case Intrinsic::sadd_sat:
59 case Intrinsic::ssub_sat:
60 case Intrinsic::uadd_sat:
61 case Intrinsic::usub_sat:
62 case Intrinsic::smul_fix:
63 case Intrinsic::smul_fix_sat:
64 case Intrinsic::umul_fix:
65 case Intrinsic::umul_fix_sat:
72 case Intrinsic::log10:
79 case Intrinsic::copysign:
84 case Intrinsic::nearbyint:
86 case Intrinsic::roundeven:
89 case Intrinsic::fmuladd:
91 case Intrinsic::canonicalize:
92 case Intrinsic::fptosi_sat:
93 case Intrinsic::fptoui_sat:
102 unsigned ScalarOpdIdx) {
105 case Intrinsic::ctlz:
106 case Intrinsic::cttz:
108 return (ScalarOpdIdx == 1);
109 case Intrinsic::smul_fix:
110 case Intrinsic::smul_fix_sat:
111 case Intrinsic::umul_fix:
112 case Intrinsic::umul_fix_sat:
113 return (ScalarOpdIdx == 2);
122 case Intrinsic::fptosi_sat:
123 case Intrinsic::fptoui_sat:
142 ID == Intrinsic::lifetime_end ||
ID == Intrinsic::assume ||
143 ID == Intrinsic::experimental_noalias_scope_decl ||
144 ID == Intrinsic::sideeffect ||
ID == Intrinsic::pseudoprobe)
161 std::advance(GEPTI, LastOperand - 2);
185 for (
unsigned i = 0,
e =
GEP->getNumOperands();
i !=
e; ++
i)
186 if (
i != InductionOperand &&
189 return GEP->getOperand(InductionOperand);
194 Value *UniqueCast =
nullptr;
196 CastInst *CI = dyn_cast<CastInst>(U);
197 if (CI && CI->
getType() == Ty) {
210 auto *PtrTy = dyn_cast<PointerType>(
Ptr->getType());
211 if (!PtrTy || PtrTy->isAggregateType())
220 int64_t PtrAccessSize = 1;
234 V =
S->getStepRecurrence(*SE);
240 if (OrigPtr ==
Ptr) {
245 const APInt &APStepVal = cast<SCEVConstant>(
M->getOperand(0))->getAPInt();
252 if (PtrAccessSize != StepVal)
254 V =
M->getOperand(1);
259 Type *StripedOffRecurrenceCast =
nullptr;
261 StripedOffRecurrenceCast =
C->getType();
276 if (StripedOffRecurrenceCast)
289 if (
auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
290 unsigned Width = FVTy->getNumElements();
296 return C->getAggregateElement(EltNo);
300 if (!isa<ConstantInt>(III->getOperand(2)))
302 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
307 return III->getOperand(1);
310 if (III == III->getOperand(0))
320 if (SVI && isa<FixedVectorType>(SVI->
getType())) {
326 if (InEl < (
int)LHSWidth)
336 if (Elt->isNullValue())
340 if (isa<ScalableVectorType>(VTy))
342 if (EltNo < VTy->getElementCount().getKnownMinValue())
357 if (SplatIndex != -1 && SplatIndex !=
M)
363 assert((SplatIndex == -1 || SplatIndex >= 0) &&
"Negative index?");
372 if (isa<VectorType>(V->
getType()))
373 if (
auto *
C = dyn_cast<Constant>(V))
374 return C->getSplatValue();
389 if (isa<VectorType>(V->
getType())) {
390 if (isa<UndefValue>(V))
394 if (
auto *
C = dyn_cast<Constant>(V))
395 return C->getSplatValue() !=
nullptr;
398 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
433 const APInt &DemandedElts,
APInt &DemandedLHS,
434 APInt &DemandedRHS,
bool AllowUndefElts) {
438 if (DemandedElts.
isZero())
442 if (
all_of(
Mask, [](
int Elt) {
return Elt == 0; })) {
447 for (
unsigned I = 0,
E =
Mask.size();
I !=
E; ++
I) {
449 assert((-1 <=
M) && (
M < (SrcWidth * 2)) &&
450 "Invalid shuffle mask constant");
452 if (!DemandedElts[
I] || (AllowUndefElts && (
M < 0)))
463 DemandedRHS.
setBit(
M - SrcWidth);
471 assert(Scale > 0 &&
"Unexpected scaling factor");
480 for (
int MaskElt :
Mask) {
483 "Overflowed 32-bits");
485 for (
int SliceElt = 0; SliceElt != Scale; ++SliceElt)
486 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
492 assert(Scale > 0 &&
"Unexpected scaling factor");
501 int NumElts =
Mask.size();
502 if (NumElts % Scale != 0)
506 ScaledMask.
reserve(NumElts / Scale);
511 assert((
int)MaskSlice.
size() == Scale &&
"Expected Scale-sized slice.");
514 int SliceFront = MaskSlice.
front();
515 if (SliceFront < 0) {
520 ScaledMask.push_back(SliceFront);
523 if (SliceFront % Scale != 0)
526 for (
int i = 1;
i < Scale; ++
i)
527 if (MaskSlice[
i] != SliceFront +
i)
529 ScaledMask.push_back(SliceFront / Scale);
532 }
while (!
Mask.empty());
534 assert((
int)ScaledMask.size() * Scale == NumElts &&
"Unexpected scaled mask");
543 std::array<SmallVector<int, 16>, 2> TmpMasks;
546 for (
unsigned Scale = 2; Scale <= InputMask.
size(); ++Scale) {
557 unsigned NumOfUsedRegs,
function_ref<
void()> NoInputAction,
565 int Sz =
Mask.size();
566 unsigned SzDest = Sz / NumOfDestRegs;
567 unsigned SzSrc = Sz / NumOfSrcRegs;
568 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
569 auto &RegMasks = Res[
I];
570 RegMasks.
assign(NumOfSrcRegs, {});
573 for (
unsigned K = 0; K < SzDest; ++K) {
574 int Idx =
I * SzDest + K;
579 int SrcRegIdx =
Mask[Idx] / SzSrc;
582 if (RegMasks[SrcRegIdx].empty())
584 RegMasks[SrcRegIdx][K] =
Mask[Idx] % SzSrc;
588 for (
unsigned I = 0;
I < NumOfUsedRegs; ++
I) {
592 switch (NumSrcRegs) {
601 unsigned SrcReg = std::distance(Dest.begin(), It);
602 SingleInputAction(*It, SrcReg,
I);
614 for (
int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
617 "Expected undefined mask element.");
618 FirstMask[Idx] = SecondMask[Idx] + VF;
623 for (
int Idx = 0, VF =
Mask.size(); Idx < VF; ++Idx) {
633 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
638 if (FirstIdx == SecondIdx) {
644 SecondMask = RegMask;
645 CombineMasks(FirstMask, SecondMask);
646 ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
647 NormalizeMask(FirstMask);
649 SecondMask = FirstMask;
650 SecondIdx = FirstIdx;
652 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
653 CombineMasks(SecondMask, FirstMask);
654 ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
655 Dest[FirstIdx].clear();
656 NormalizeMask(SecondMask);
658 }
while (SecondIdx >= 0);
681 bool SeenExtFromIllegalType =
false;
682 for (
auto *
BB : Blocks)
683 for (
auto &
I : *
BB) {
686 if (
TTI && (isa<ZExtInst>(&
I) || isa<SExtInst>(&
I)) &&
688 SeenExtFromIllegalType =
true;
691 if ((isa<TruncInst>(&
I) || isa<ICmpInst>(&
I)) &&
692 !
I.getType()->isVectorTy() &&
693 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
699 Worklist.push_back(&
I);
704 if (Worklist.empty() || (
TTI && !SeenExtFromIllegalType))
708 while (!Worklist.empty()) {
712 if (!Visited.
insert(Val).second)
716 if (!isa<Instruction>(Val))
722 if (
DB.getDemandedBits(
I).getBitWidth() > 64)
731 if (isa<SExtInst>(
I) || isa<ZExtInst>(
I) || isa<LoadInst>(
I) ||
738 if (isa<BitCastInst>(
I) || isa<PtrToIntInst>(
I) || isa<IntToPtrInst>(
I) ||
739 !
I->getType()->isIntegerTy()) {
740 DBits[Leader] |= ~0ULL;
750 if (DBits[Leader] == ~0ULL)
754 for (
Value *
O : cast<User>(
I)->operands()) {
756 Worklist.push_back(
O);
763 for (
auto &
I : DBits)
764 for (
auto *U :
I.first->users())
765 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
771 LeaderDemandedBits |= DBits[
M];
773 uint64_t MinBW = (
sizeof(LeaderDemandedBits) * 8) -
793 if (!isa<Instruction>(
M))
795 Type *Ty =
M->getType();
797 Ty = cast<Instruction>(
M)->getOperand(0)->getType();
799 MinBWs[cast<Instruction>(
M)] = MinBW;
807 template <
typename ListT>
812 List.insert(AccGroups);
816 for (
const auto &AccGroupListOp : AccGroups->
operands()) {
817 auto *Item = cast<MDNode>(AccGroupListOp.get());
828 if (AccGroups1 == AccGroups2)
835 if (Union.size() == 0)
837 if (Union.size() == 1)
838 return cast<MDNode>(Union.front());
849 if (!MayAccessMem1 && !MayAccessMem2)
852 return Inst2->
getMetadata(LLVMContext::MD_access_group);
854 return Inst1->
getMetadata(LLVMContext::MD_access_group);
870 if (AccGroupSet2.
count(MD1))
871 Intersection.push_back(MD1);
874 auto *Item = cast<MDNode>(Node.get());
876 if (AccGroupSet2.
count(Item))
877 Intersection.push_back(Item);
881 if (Intersection.size() == 0)
883 if (Intersection.size() == 1)
884 return cast<MDNode>(Intersection.front());
898 for (
auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
899 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
900 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
901 LLVMContext::MD_access_group}) {
904 for (
int J = 1,
E = VL.
size(); MD && J !=
E; ++J) {
908 case LLVMContext::MD_tbaa:
911 case LLVMContext::MD_alias_scope:
914 case LLVMContext::MD_fpmath:
917 case LLVMContext::MD_noalias:
918 case LLVMContext::MD_nontemporal:
919 case LLVMContext::MD_invariant_load:
922 case LLVMContext::MD_access_group:
947 for (
unsigned i = 0;
i < VF;
i++)
949 unsigned HasMember = Group.
getMember(
j) ? 1 : 0;
959 for (
unsigned i = 0;
i < VF;
i++)
960 for (
unsigned j = 0;
j < ReplicationFactor;
j++)
961 MaskVec.push_back(
i);
969 for (
unsigned i = 0;
i < VF;
i++)
970 for (
unsigned j = 0;
j < NumVecs;
j++)
971 Mask.push_back(
j * VF +
i);
979 for (
unsigned i = 0;
i < VF;
i++)
980 Mask.push_back(Start +
i * Stride);
987 unsigned NumUndefs) {
989 for (
unsigned i = 0;
i < NumInts;
i++)
990 Mask.push_back(Start +
i);
992 for (
unsigned i = 0;
i < NumUndefs;
i++)
1001 int NumEltsSigned = NumElts;
1002 assert(NumEltsSigned > 0 &&
"Expected smaller or non-zero element count");
1007 for (
int MaskElt :
Mask) {
1008 assert((MaskElt < NumEltsSigned * 2) &&
"Expected valid shuffle mask");
1009 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1010 UnaryMask.push_back(UnaryElt);
1021 VectorType *VecTy2 = dyn_cast<VectorType>(
V2->getType());
1022 assert(VecTy1 && VecTy2 &&
1024 "Expect two vectors with the same element type");
1026 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1027 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1028 assert(NumElts1 >= NumElts2 &&
"Unexpect the first vector has less elements");
1030 if (NumElts1 > NumElts2) {
1036 return Builder.CreateShuffleVector(
1042 unsigned NumVecs = Vecs.
size();
1043 assert(NumVecs > 1 &&
"Should be at least two vectors");
1049 for (
unsigned i = 0;
i < NumVecs - 1;
i += 2) {
1050 Value *V0 = ResList[
i], *V1 = ResList[
i + 1];
1052 "Only the last vector may have a different type");
1058 if (NumVecs % 2 != 0)
1059 TmpList.push_back(ResList[NumVecs - 1]);
1062 NumVecs = ResList.size();
1063 }
while (NumVecs > 1);
1070 isa<IntegerType>(
Mask->getType()->getScalarType()) &&
1071 cast<IntegerType>(
Mask->getType()->getScalarType())->getBitWidth() ==
1073 "Mask must be a vector of i1");
1075 auto *ConstMask = dyn_cast<Constant>(
Mask);
1078 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1080 if (isa<ScalableVectorType>(ConstMask->getType()))
1084 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1086 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1087 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1096 isa<IntegerType>(
Mask->getType()->getScalarType()) &&
1097 cast<IntegerType>(
Mask->getType()->getScalarType())->getBitWidth() ==
1099 "Mask must be a vector of i1");
1101 auto *ConstMask = dyn_cast<Constant>(
Mask);
1104 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1106 if (isa<ScalableVectorType>(ConstMask->getType()))
1110 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1112 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1113 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1123 assert(isa<FixedVectorType>(
Mask->getType()) &&
1124 isa<IntegerType>(
Mask->getType()->getScalarType()) &&
1125 cast<IntegerType>(
Mask->getType()->getScalarType())->getBitWidth() ==
1127 "Mask must be a fixed width vector of i1");
1129 const unsigned VWidth =
1130 cast<FixedVectorType>(
Mask->getType())->getNumElements();
1132 if (
auto *CV = dyn_cast<ConstantVector>(
Mask))
1133 for (
unsigned i = 0;
i < VWidth;
i++)
1134 if (CV->getAggregateElement(
i)->isNullValue())
1136 return DemandedElts;
1139 bool InterleavedAccessInfo::isStrided(
int Stride) {
1140 unsigned Factor =
std::abs(Stride);
1144 void InterleavedAccessInfo::collectConstStrideAccesses(
1147 auto &
DL = TheLoop->getHeader()->getModule()->getDataLayout();
1158 for (
auto &
I : *
BB) {
1167 if (Size * 8 !=
DL.getTypeSizeInBits(ElementTy))
1179 true,
false).value_or(0);
1182 AccessStrideInfo[&
I] = StrideDescriptor(Stride, Scev, Size,
1224 bool EnablePredicatedInterleavedMemAccesses) {
1230 collectConstStrideAccesses(AccessStrideInfo, Strides);
1232 if (AccessStrideInfo.
empty())
1236 collectDependences();
1255 for (
auto BI = AccessStrideInfo.
rbegin(),
E = AccessStrideInfo.
rend();
1258 StrideDescriptor DesB = BI->second;
1264 if (isStrided(DesB.Stride) &&
1265 (!
isPredicated(
B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1266 Group = getInterleaveGroup(
B);
1270 Group = createInterleaveGroup(
B, DesB.Stride, DesB.Alignment);
1272 if (
B->mayWriteToMemory())
1273 StoreGroups.
insert(Group);
1275 LoadGroups.
insert(Group);
1278 for (
auto AI = std::next(BI); AI !=
E; ++AI) {
1280 StrideDescriptor DesA = AI->second;
1301 if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1307 if (isInterleaved(A)) {
1311 "dependence between " << *A <<
" and "<< *
B <<
'\n');
1313 StoreGroups.
remove(StoreGroup);
1314 releaseGroup(StoreGroup);
1327 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1336 if (isInterleaved(A) ||
1337 (A->mayReadFromMemory() !=
B->mayReadFromMemory()) ||
1338 (A->mayWriteToMemory() !=
B->mayWriteToMemory()))
1343 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1353 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1360 if (DistanceToB %
static_cast<int64_t
>(DesB.Size))
1368 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1374 Group->
getIndex(
B) + DistanceToB /
static_cast<int64_t
>(DesB.Size);
1379 <<
" into the interleave group with" << *
B
1381 InterleaveGroupMap[A] = Group;
1384 if (A->mayReadFromMemory())
1392 std::string FirstOrLast) ->
bool {
1394 assert(Member &&
"Group member does not exist");
1397 if (
getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1398 false,
true).value_or(0))
1400 LLVM_DEBUG(
dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1402 <<
" group member potentially pointer-wrapping.\n");
1403 releaseGroup(Group);
1421 for (
auto *Group : LoadGroups) {
1433 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1436 InvalidateGroupIfMemberMayWrap(Group, Group->
getFactor() - 1,
1437 std::string(
"last"));
1446 dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1447 "a reverse access with gaps.\n");
1448 releaseGroup(Group);
1452 dbgs() <<
"LV: Interleaved group requires epilogue iteration.\n");
1453 RequiresScalarEpilogue =
true;
1457 for (
auto *Group : StoreGroups) {
1467 if (!EnablePredicatedInterleavedMemAccesses) {
1469 dbgs() <<
"LV: Invalidate candidate interleaved store group due "
1471 releaseGroup(Group);
1481 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1485 InvalidateGroupIfMemberMayWrap(Group,
Index, std::string(
"last"));
1494 if (!requiresScalarEpilogue())
1497 bool ReleasedGroup =
false;
1501 if (!Group->requiresScalarEpilogue())
1505 <<
"LV: Invalidate candidate interleaved group due to gaps that "
1506 "require a scalar epilogue (not allowed under optsize) and cannot "
1507 "be masked (not enabled). \n");
1508 releaseGroup(Group);
1509 ReleasedGroup =
true;
1511 assert(ReleasedGroup &&
"At least one group must be invalidated, as a "
1512 "scalar epilogue was required");
1513 (void)ReleasedGroup;
1514 RequiresScalarEpilogue =
false;
1517 template <
typename InstT>
1526 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1527 [](std::pair<int, Instruction *>
p) { return p.second; });
1542 for (
unsigned I = 0;
I < numArgs; ++
I)
1544 Out <<
"_" << ScalarName <<
"(" << VectorName <<
")";
1545 return std::string(Out.
str());
1555 S.split(ListAttr,
",");
1560 std::optional<VFInfo>
Info =
1562 assert(
Info &&
"Invalid name for a VFABI variant.");
1564 "Vector function is missing.");
1566 VariantMappings.push_back(std::string(
S));
1571 for (
unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
1573 assert(Parameters[Pos].ParamPos == Pos &&
"Broken parameter list.");
1575 switch (Parameters[Pos].ParamKind) {
1583 if (Parameters[Pos].LinearStepOrPos == 0)
1592 if (Parameters[Pos].LinearStepOrPos >=
int(NumParams))
1595 if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
1599 if (Parameters[Pos].LinearStepOrPos ==
int(Pos))
1605 for (
unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
This is an optimization pass for GlobalISel generic memory operations.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A parsed version of the target data layout string in and methods for querying it.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Represents a single loop in the control flow graph.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * >> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
member_iterator member_end() const
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
int64_t getSExtValue() const
Get sign extended value.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
The main scalar evolution driver.
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr unsigned MaxAnalysisRecursionDepth
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
The instances of the Type class are immutable: once they are created, they are never changed.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
This class implements a map that also provides access to all stored values in a deterministic order.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Type * getElementType() const
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
gep_type_iterator gep_type_begin(const User *GEP)
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
The group of interleaved loads/stores sharing the same stride and close to each other.
the resulting code requires compare and branches when and if * p
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
bool empty() const
empty - Check if the array is empty.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM Basic Block Representation.
unsigned getNumOperands() const
Return number of MDNode operands.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
constexpr int UndefMaskElem
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
bool remove(const value_type &X)
Remove an item from the set vector.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
bool match(Val *V, const Pattern &P)
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
(vector float) vec_cmpeq(*A, *B) C
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
static uint64_t round(uint64_t Acc, uint64_t Input)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
ArrayRef< MDOperand > operands() const
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
bool isVectorTy() const
True if this is an instance of VectorType.
This instruction inserts a single (scalar) element into a VectorType value.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This node represents multiplication of some number of SCEVs.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Analysis containing CSE Info
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
StringRef getValueAsString() const
Return the attribute's value as a string.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
VectorType * getType() const
Overload to return most specific vector type.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
void setInsertPos(InstTy *Inst)
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
An efficient, type-erasing, non-owning reference to a callable.
Store the result of a depth first search within basic blocks contained by a single loop.
Base class of all SIMD vector types.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
This class represents an analyzed expression in the program.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an important base class in LLVM.
static MDNode * intersect(MDNode *A, MDNode *B)
uint32_t getFactor() const
This is an important class for using LLVM in a threaded context.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Type * getResultElementType() const
This class represents a constant integer value.
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
constexpr ScalarTy getFixedValue() const
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Type * getIndexedType() const
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Class for arbitrary precision integers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
StringRef - Represent a constant reference to a string, i.e.
member_iterator member_begin(iterator I) const
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Type * getType() const
All values are typed, get the type of this value.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
LLVMContext & getContext() const
All values hold a context through their type.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Common base class shared among various IRBuilders.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
static Constant * get(ArrayRef< Constant * > V)
This is the base class for all instructions that perform data casts.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
const T & front() const
front - Get the first element.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
reverse_iterator rbegin()
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVMContext & getContext() const
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
void assign(size_type NumElts, ValueParamT Elt)
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
This node represents a polynomial recurrence on the trip count of the specified loop.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Provides information about what library functions are available for the current target.
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
static double log2(double V)
instcombine should handle this transform
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
const APInt & getAPInt() const
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
This instruction constructs a fixed permutation of two input vectors.
This is the base class for unary integral cast operator classes.
unsigned getNumOperands() const
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
unsigned countLeadingZeros(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
size_t size() const
size - Get the array size.
static constexpr const char * MappingsAttrName
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
A raw_ostream that writes to an SmallVector or SmallString.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
A SetVector that performs no allocations if smaller than a certain size.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Value * getOperand(unsigned i) const
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
A vector that has set insertion semantics.
void reserve(size_type N)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
APFloat abs(APFloat X)
Returns the absolute value of the argument.
LLVM Value Representation.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Tracking metadata reference owned by Metadata.
uint32_t getNumMembers() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.