57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
84 Ingredient.getDebugLoc());
87 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
88 nullptr ,
false ,
false , *VPI,
89 Ingredient.getDebugLoc());
92 Ingredient.getDebugLoc());
104 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
109 if (VectorID == Intrinsic::assume ||
110 VectorID == Intrinsic::lifetime_end ||
111 VectorID == Intrinsic::lifetime_start ||
112 VectorID == Intrinsic::sideeffect ||
113 VectorID == Intrinsic::pseudoprobe) {
118 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
119 VectorID != Intrinsic::pseudoprobe;
123 Ingredient.getDebugLoc());
126 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
127 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
131 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
135 *VPI, Ingredient.getDebugLoc());
139 "inductions must be created earlier");
148 "Only recpies with zero or one defined values expected");
149 Ingredient.eraseFromParent();
166 if (
A->getOpcode() != Instruction::Store ||
167 B->getOpcode() != Instruction::Store)
177 const APInt *Distance;
183 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
185 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
191 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
193 auto VFs =
B->getParent()->getPlan()->vectorFactors();
197 return Distance->
abs().
uge(
205 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
206 L(L), TypeInfo(TypeInfo) {}
213 return ExcludeRecipes.contains(&R) ||
214 (Store && isNoAliasViaDistance(Store, &GroupLeader));
227 std::optional<SinkStoreInfo> SinkInfo = {}) {
228 bool CheckReads = SinkInfo.has_value();
235 if (SinkInfo && SinkInfo->shouldSkip(R))
239 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
257template <
unsigned Opcode>
262 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
263 "Only Load and Store opcodes supported");
264 constexpr bool IsLoad = (Opcode == Instruction::Load);
271 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
275 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
278 RecipesByAddress[AddrSCEV].push_back(RepR);
283 for (
auto &Group :
Groups) {
302 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
307 return RepR && RepR->getOpcode() == Instruction::Alloca;
316 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
332 if (!ScalarVFOnly && RepR->isSingleScalar())
335 WorkList.
insert({SinkTo, Candidate});
347 for (
auto &Recipe : *VPBB)
349 InsertIfValidSinkCandidate(VPBB,
Op);
353 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
356 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
361 auto UsersOutsideSinkTo =
363 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
365 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
366 return !U->usesFirstLaneOnly(SinkCandidate);
369 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
371 if (NeedsDuplicating) {
375 if (
auto *SinkCandidateRepR =
381 nullptr , *SinkCandidateRepR,
385 Clone = SinkCandidate->
clone();
395 InsertIfValidSinkCandidate(SinkTo,
Op);
405 if (!EntryBB || EntryBB->size() != 1 ||
415 if (EntryBB->getNumSuccessors() != 2)
420 if (!Succ0 || !Succ1)
423 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
425 if (Succ0->getSingleSuccessor() == Succ1)
427 if (Succ1->getSingleSuccessor() == Succ0)
444 if (!Region1->isReplicator())
446 auto *MiddleBasicBlock =
448 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
453 if (!Region2 || !Region2->isReplicator())
458 if (!Mask1 || Mask1 != Mask2)
461 assert(Mask1 && Mask2 &&
"both region must have conditions");
467 if (TransformedRegions.
contains(Region1))
474 if (!Then1 || !Then2)
494 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
500 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
501 Phi1ToMove.eraseFromParent();
504 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
518 TransformedRegions.
insert(Region1);
521 return !TransformedRegions.
empty();
528 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
529 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
530 auto *BlockInMask = PredRecipe->
getMask();
549 RecipeWithoutMask->getDebugLoc());
573 if (RepR->isPredicated())
592 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
604 if (!VPBB->getParent())
608 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
617 R.moveBefore(*PredVPBB, PredVPBB->
end());
619 auto *ParentRegion = VPBB->getParent();
620 if (ParentRegion && ParentRegion->getExiting() == VPBB)
621 ParentRegion->setExiting(PredVPBB);
625 return !WorkList.
empty();
632 bool ShouldSimplify =
true;
633 while (ShouldSimplify) {
649 if (!
IV ||
IV->getTruncInst())
664 for (
auto *U : FindMyCast->
users()) {
666 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
667 FoundUserCast = UserCast;
671 FindMyCast = FoundUserCast;
696 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
710 WidenOriginalIV->dropPoisonGeneratingFlags();
723 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
725 if (IsConditionalAssume)
728 if (R.mayHaveSideEffects())
732 return all_of(R.definedValues(),
733 [](
VPValue *V) { return V->getNumUsers() == 0; });
752 VPUser *PhiUser = PhiR->getSingleUser();
755 if (PhiUser !=
Incoming->getDefiningRecipe() ||
758 PhiR->replaceAllUsesWith(Start);
759 PhiR->eraseFromParent();
760 Incoming->getDefiningRecipe()->eraseFromParent();
775 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
785 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
791 if (ResultTy != StepTy) {
798 Builder.setInsertPoint(VecPreheader);
799 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
801 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
807 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
812 Users.insert_range(V->users());
814 return Users.takeVector();
828 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
865 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
866 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
874 Def->operands(),
true,
876 Clone->insertAfter(Def);
877 Def->replaceAllUsesWith(Clone);
888 PtrIV->replaceAllUsesWith(PtrAdd);
895 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
896 return U->usesScalars(WideIV);
902 Plan,
ID.getKind(),
ID.getInductionOpcode(),
904 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
905 WideIV->getDebugLoc(), Builder);
908 if (!HasOnlyVectorVFs) {
910 "plans containing a scalar VF cannot also include scalable VFs");
911 WideIV->replaceAllUsesWith(Steps);
914 WideIV->replaceUsesWithIf(Steps,
915 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
917 return U.usesFirstLaneOnly(WideIV);
918 return U.usesScalars(WideIV);
934 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
939 if (!Def || Def->getNumOperands() != 2)
947 auto IsWideIVInc = [&]() {
948 auto &
ID = WideIV->getInductionDescriptor();
951 VPValue *IVStep = WideIV->getStepValue();
952 switch (
ID.getInductionOpcode()) {
953 case Instruction::Add:
955 case Instruction::FAdd:
957 case Instruction::FSub:
960 case Instruction::Sub: {
980 return IsWideIVInc() ? WideIV :
nullptr;
1000 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1013 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
1014 FirstActiveLaneType,
DL);
1015 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1022 EndValue =
B.createAdd(EndValue, One,
DL);
1025 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1027 VPIRValue *Start = WideIV->getStartValue();
1028 VPValue *Step = WideIV->getStepValue();
1029 EndValue =
B.createDerivedIV(
1031 Start, EndValue, Step);
1046 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1053 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1056 Start, VectorTC, Step);
1085 assert(EndValue &&
"Must have computed the end value up front");
1101 auto *Zero = Plan.
getZero(StepTy);
1102 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1107 return B.createNaryOp(
1108 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1110 : Instruction::FAdd,
1111 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1123 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1132 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1133 EndValues[WideIV] = EndValue;
1143 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1144 R.eraseFromParent();
1153 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1155 if (PredVPBB == MiddleVPBB)
1157 ExitIRI->getOperand(Idx),
1161 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1163 ExitIRI->setOperand(Idx, Escape);
1180 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1183 ExpR->replaceAllUsesWith(V->second);
1184 ExpR->eraseFromParent();
1193 while (!WorkList.
empty()) {
1195 if (!Seen.
insert(Cur).second)
1203 R->eraseFromParent();
1210static std::optional<std::pair<bool, unsigned>>
1213 std::optional<std::pair<bool, unsigned>>>(R)
1216 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1218 return std::make_pair(
true,
I->getVectorIntrinsicID());
1220 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1224 return std::make_pair(
false,
1227 .
Default([](
auto *) {
return std::nullopt; });
1245 Value *V =
Op->getUnderlyingValue();
1251 auto FoldToIRValue = [&]() ->
Value * {
1253 if (OpcodeOrIID->first) {
1254 if (R.getNumOperands() != 2)
1256 unsigned ID = OpcodeOrIID->second;
1257 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1260 unsigned Opcode = OpcodeOrIID->second;
1269 return Folder.FoldSelect(
Ops[0],
Ops[1],
1272 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1274 case Instruction::Select:
1275 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1276 case Instruction::ICmp:
1277 case Instruction::FCmp:
1280 case Instruction::GetElementPtr: {
1283 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1293 case Instruction::ExtractElement:
1300 if (
Value *V = FoldToIRValue())
1301 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1307 VPlan *Plan = Def->getParent()->getPlan();
1313 return Def->replaceAllUsesWith(V);
1319 PredPHI->replaceAllUsesWith(
Op);
1332 bool CanCreateNewRecipe =
1339 if (TruncTy == ATy) {
1340 Def->replaceAllUsesWith(
A);
1349 : Instruction::ZExt;
1352 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1354 Ext->setUnderlyingValue(UnderlyingExt);
1356 Def->replaceAllUsesWith(Ext);
1358 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1359 Def->replaceAllUsesWith(Trunc);
1367 for (
VPUser *U :
A->users()) {
1369 for (
VPValue *VPV : R->definedValues())
1383 Def->replaceAllUsesWith(
X);
1384 Def->eraseFromParent();
1390 return Def->replaceAllUsesWith(
1395 return Def->replaceAllUsesWith(
X);
1399 return Def->replaceAllUsesWith(
1404 return Def->replaceAllUsesWith(
1409 return Def->replaceAllUsesWith(
X);
1413 return Def->replaceAllUsesWith(Plan->
getFalse());
1417 return Def->replaceAllUsesWith(
X);
1420 if (CanCreateNewRecipe &&
1425 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1426 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1427 return Def->replaceAllUsesWith(
1428 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1433 return Def->replaceAllUsesWith(Def->getOperand(1));
1438 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1442 return Def->replaceAllUsesWith(Plan->
getFalse());
1445 return Def->replaceAllUsesWith(
X);
1449 if (CanCreateNewRecipe &&
1451 return Def->replaceAllUsesWith(Builder.createNot(
C));
1455 Def->setOperand(0,
C);
1456 Def->setOperand(1,
Y);
1457 Def->setOperand(2,
X);
1462 return Def->replaceAllUsesWith(
A);
1465 return Def->replaceAllUsesWith(
A);
1468 return Def->replaceAllUsesWith(
1475 return Def->replaceAllUsesWith(
1477 Def->getDebugLoc(),
"", NW));
1483 return Def->replaceAllUsesWith(Builder.createNaryOp(
1485 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1490 const VPRegionBlock *ParentRegion = Def->getParent()->getParent();
1491 bool IsInReplicateRegion = ParentRegion && ParentRegion->
isReplicator();
1492 if (CanCreateNewRecipe && !IsInReplicateRegion &&
1494 return Def->replaceAllUsesWith(Builder.createNaryOp(
1496 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1501 return Def->replaceAllUsesWith(
A);
1516 R->setOperand(1,
Y);
1517 R->setOperand(2,
X);
1521 R->replaceAllUsesWith(Cmp);
1526 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1527 Cmp->setDebugLoc(Def->getDebugLoc());
1539 if (
Op->getNumUsers() > 1 ||
1543 }
else if (!UnpairedCmp) {
1544 UnpairedCmp =
Op->getDefiningRecipe();
1548 UnpairedCmp =
nullptr;
1555 if (NewOps.
size() < Def->getNumOperands()) {
1557 return Def->replaceAllUsesWith(NewAnyOf);
1564 if (CanCreateNewRecipe &&
1570 return Def->replaceAllUsesWith(NewCmp);
1578 return Def->replaceAllUsesWith(Def->getOperand(1));
1584 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1585 Def->replaceAllUsesWith(
X);
1595 Def->setOperand(1, Def->getOperand(0));
1596 Def->setOperand(0,
Y);
1603 return Def->replaceAllUsesWith(Def->getOperand(0));
1609 Def->replaceAllUsesWith(
1610 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1614 return Def->replaceAllUsesWith(
A);
1620 Def->replaceAllUsesWith(
1621 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1628 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1633 Def->replaceAllUsesWith(
1643 "broadcast operand must be single-scalar");
1644 Def->setOperand(0,
C);
1649 if (Def->getNumOperands() == 1) {
1650 Def->replaceAllUsesWith(Def->getOperand(0));
1655 Phi->replaceAllUsesWith(Phi->getOperand(0));
1661 if (Def->getNumOperands() == 1 &&
1663 return Def->replaceAllUsesWith(IRV);
1676 return Def->replaceAllUsesWith(
A);
1679 Def->replaceAllUsesWith(Builder.createNaryOp(
1680 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1694 auto *IVInc = Def->getOperand(0);
1695 if (IVInc->getNumUsers() == 2) {
1700 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1701 Def->replaceAllUsesWith(IVInc);
1703 Inc->replaceAllUsesWith(Phi);
1704 Phi->setOperand(0,
Y);
1720 Steps->replaceAllUsesWith(Steps->getOperand(0));
1728 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1730 return PhiR && PhiR->isInLoop();
1736 Def->replaceAllUsesWith(
A);
1745 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1746 return Def->replaceAllUsesWith(
A);
1750 return Def->replaceAllUsesWith(
A);
1777 while (!Worklist.
empty()) {
1786 R->replaceAllUsesWith(
1787 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1806 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1815 !WidenStoreR->isConsecutive()) {
1816 assert(!WidenStoreR->isReverse() &&
1817 "Not consecutive memory recipes shouldn't be reversed");
1818 VPValue *Mask = WidenStoreR->getMask();
1827 {WidenStoreR->getOperand(1)});
1832 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1833 true ,
nullptr , {},
1835 ScalarStore->insertBefore(WidenStoreR);
1836 WidenStoreR->eraseFromParent();
1844 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1845 true ,
nullptr , *RepR ,
1846 *RepR , RepR->getDebugLoc());
1847 Clone->insertBefore(RepOrWidenR);
1849 VPValue *ExtractOp = Clone->getOperand(0);
1855 Clone->setOperand(0, ExtractOp);
1856 RepR->eraseFromParent();
1865 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1874 return !U->usesScalars(
Op);
1878 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1881 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1882 IntroducesBCastOf(Op)))
1886 auto *IRV = dyn_cast<VPIRValue>(Op);
1887 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1888 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1889 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1894 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1895 true ,
nullptr, *RepOrWidenR);
1896 Clone->insertBefore(RepOrWidenR);
1897 RepOrWidenR->replaceAllUsesWith(Clone);
1899 RepOrWidenR->eraseFromParent();
1935 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1936 UniqueValues.
insert(Blend->getIncomingValue(0));
1937 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1939 UniqueValues.
insert(Blend->getIncomingValue(
I));
1941 if (UniqueValues.
size() == 1) {
1942 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1943 Blend->eraseFromParent();
1947 if (Blend->isNormalized())
1953 unsigned StartIndex = 0;
1954 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1959 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1966 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1968 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1969 if (
I == StartIndex)
1971 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1972 OperandsWithMask.
push_back(Blend->getMask(
I));
1977 OperandsWithMask, *Blend, Blend->getDebugLoc());
1978 NewBlend->insertBefore(&R);
1980 VPValue *DeadMask = Blend->getMask(StartIndex);
1982 Blend->eraseFromParent();
1987 if (NewBlend->getNumOperands() == 3 &&
1989 VPValue *Inc0 = NewBlend->getOperand(0);
1990 VPValue *Inc1 = NewBlend->getOperand(1);
1991 VPValue *OldMask = NewBlend->getOperand(2);
1992 NewBlend->setOperand(0, Inc1);
1993 NewBlend->setOperand(1, Inc0);
1994 NewBlend->setOperand(2, NewMask);
2021 APInt MaxVal = AlignedTC - 1;
2024 unsigned NewBitWidth =
2030 bool MadeChange =
false;
2039 if (!WideIV || !WideIV->isCanonical() ||
2040 WideIV->hasMoreThanOneUniqueUser() ||
2041 NewIVTy == WideIV->getScalarType())
2046 VPUser *SingleUser = WideIV->getSingleUser();
2054 auto *NewStart = Plan.
getZero(NewIVTy);
2055 WideIV->setStartValue(NewStart);
2057 WideIV->setStepValue(NewStep);
2064 Cmp->setOperand(1, NewBTC);
2078 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2080 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2093 const SCEV *VectorTripCount =
2098 "Trip count SCEV must be computable");
2119 auto *Term = &ExitingVPBB->
back();
2132 for (
unsigned Part = 0; Part < UF; ++Part) {
2138 Extracts[Part] = Ext;
2150 match(Phi->getBackedgeValue(),
2152 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2169 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2176 "Expected incoming values of Phi to be ActiveLaneMasks");
2181 EntryALM->setOperand(2, ALMMultiplier);
2182 LoopALM->setOperand(2, ALMMultiplier);
2186 ExtractFromALM(EntryALM, EntryExtracts);
2191 ExtractFromALM(LoopALM, LoopExtracts);
2193 Not->setOperand(0, LoopExtracts[0]);
2196 for (
unsigned Part = 0; Part < UF; ++Part) {
2197 Phis[Part]->setStartValue(EntryExtracts[Part]);
2198 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2211 auto *Term = &ExitingVPBB->
back();
2223 const SCEV *VectorTripCount =
2229 "Trip count SCEV must be computable");
2248 Term->setOperand(1, Plan.
getTrue());
2253 {}, Term->getDebugLoc());
2255 Term->eraseFromParent();
2290 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2300 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2301 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2329 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2332 if (SinkCandidate == Previous)
2336 !Seen.
insert(SinkCandidate).second ||
2349 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2352 "only recipes with a single defined value expected");
2367 if (SinkCandidate == FOR)
2370 SinkCandidate->moveAfter(Previous);
2371 Previous = SinkCandidate;
2395 [&VPDT, HoistPoint](
VPUser *U) {
2396 auto *R = cast<VPRecipeBase>(U);
2397 return HoistPoint == R ||
2398 VPDT.properlyDominates(HoistPoint, R);
2400 "HoistPoint must dominate all users of FOR");
2402 auto NeedsHoisting = [HoistPoint, &VPDT,
2404 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2405 if (!HoistCandidate)
2410 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2411 "CFG in VPlan should still be flat, without replicate regions");
2413 if (!Visited.
insert(HoistCandidate).second)
2425 return HoistCandidate;
2434 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2437 "only recipes with a single defined value expected");
2449 if (
auto *R = NeedsHoisting(
Op)) {
2452 if (R->getNumDefinedValues() != 1)
2466 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2486 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2489 while (
auto *PrevPhi =
2491 assert(PrevPhi->getParent() == FOR->getParent());
2493 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2512 {FOR, FOR->getBackedgeValue()});
2517 RecurSplice->setOperand(0, FOR);
2523 for (
VPUser *U : RecurSplice->users()) {
2533 VPValue *PenultimateIndex =
B.createSub(LastActiveLane, One);
2534 VPValue *PenultimateLastIter =
2536 {PenultimateIndex, FOR->getBackedgeValue()});
2541 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2554 RecurKind RK = PhiR->getRecurrenceKind();
2561 RecWithFlags->dropPoisonGeneratingFlags();
2567struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2569 return Def == getEmptyKey() || Def == getTombstoneKey();
2580 return GEP->getSourceElementType();
2583 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2584 [](
auto *
I) {
return I->getSourceElementType(); })
2585 .
Default([](
auto *) {
return nullptr; });
2589 static bool canHandle(
const VPSingleDefRecipe *Def) {
2598 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2599 C->second == Instruction::ExtractValue)))
2605 return !
Def->mayReadFromMemory();
2609 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2610 const VPlan *Plan =
Def->getParent()->getPlan();
2611 VPTypeAnalysis TypeInfo(*Plan);
2614 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2617 if (RFlags->hasPredicate())
2623 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2626 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2628 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2630 !
equal(
L->operands(),
R->operands()))
2633 "must have valid opcode info for both recipes");
2635 if (LFlags->hasPredicate() &&
2636 LFlags->getPredicate() !=
2642 const VPRegionBlock *RegionL =
L->getRegion();
2643 const VPRegionBlock *RegionR =
R->getRegion();
2646 L->getParent() !=
R->getParent())
2648 const VPlan *Plan =
L->getParent()->getPlan();
2649 VPTypeAnalysis TypeInfo(*Plan);
2650 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2666 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2670 if (!VPDT.
dominates(V->getParent(), VPBB))
2675 Def->replaceAllUsesWith(V);
2694 "Expected vector prehader's successor to be the vector loop region");
2701 return !Op->isDefinedOutsideLoopRegions();
2704 R.moveBefore(*Preheader, Preheader->
end());
2721 assert(!RepR->isPredicated() &&
2722 "Expected prior transformation of predicated replicates to "
2723 "replicate regions");
2728 if (!RepR->isSingleScalar())
2738 if (Def->getNumUsers() == 0)
2747 auto *UserR = cast<VPRecipeBase>(U);
2748 VPBasicBlock *Parent = UserR->getParent();
2751 if (UserR->isPhi() || Parent->getEnclosingLoopRegion())
2754 if (SinkBB && SinkBB != Parent)
2768 "Defining block must dominate sink block");
2794 VPValue *ResultVPV = R.getVPSingleValue();
2796 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2797 if (!NewResSizeInBits)
2810 (void)OldResSizeInBits;
2818 VPW->dropPoisonGeneratingFlags();
2820 if (OldResSizeInBits != NewResSizeInBits &&
2824 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2826 Ext->insertAfter(&R);
2828 Ext->setOperand(0, ResultVPV);
2829 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2832 "Only ICmps should not need extending the result.");
2842 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2843 auto *
Op = R.getOperand(Idx);
2844 unsigned OpSizeInBits =
2846 if (OpSizeInBits == NewResSizeInBits)
2848 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2849 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2851 R.setOperand(Idx, ProcessedIter->second);
2859 Builder.setInsertPoint(&R);
2861 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2862 ProcessedIter->second = NewOp;
2863 R.setOperand(Idx, NewOp);
2871 std::optional<VPDominatorTree> VPDT;
2885 assert(VPBB->getNumSuccessors() == 2 &&
2886 "Two successors expected for BranchOnCond");
2887 unsigned RemovedIdx;
2898 "There must be a single edge between VPBB and its successor");
2907 VPBB->back().eraseFromParent();
2961 VPValue *StartV = CanonicalIVPHI->getStartValue();
2963 auto *CanonicalIVIncrement =
2966 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2967 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2978 auto *EntryIncrement = Builder.createOverflowingOp(
2980 DL,
"index.part.next");
2986 {EntryIncrement, TC, ALMMultiplier},
DL,
2987 "active.lane.mask.entry");
2993 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2998 Builder.setInsertPoint(OriginalTerminator);
2999 auto *InLoopIncrement = Builder.createOverflowingOp(
3001 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
3003 {InLoopIncrement, TC, ALMMultiplier},
DL,
3004 "active.lane.mask.next");
3009 auto *NotMask = Builder.createNot(ALM,
DL);
3016 bool UseActiveLaneMaskForControlFlow) {
3018 auto *FoundWidenCanonicalIVUser =
find_if(
3020 assert(FoundWidenCanonicalIVUser &&
3021 "Must have widened canonical IV when tail folding!");
3023 auto *WideCanonicalIV =
3026 if (UseActiveLaneMaskForControlFlow) {
3035 nullptr,
"active.lane.mask");
3051 template <
typename OpTy>
bool match(OpTy *V)
const {
3062template <
typename Op0_t,
typename Op1_t>
3081 VPValue *Addr, *Mask, *EndPtr;
3084 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3086 EVLEndPtr->insertBefore(&CurRecipe);
3087 EVLEndPtr->setOperand(1, &EVL);
3091 if (
match(&CurRecipe,
3105 LoadR->insertBefore(&CurRecipe);
3107 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
3116 StoredVal, EVL, Mask);
3118 if (
match(&CurRecipe,
3124 Intrinsic::experimental_vp_reverse,
3125 {ReversedVal, Plan->
getTrue(), &EVL},
3129 AdjustEndPtr(EndPtr), NewReverse, EVL,
3134 if (Rdx->isConditional() &&
3139 if (Interleave->getMask() &&
3144 if (
match(&CurRecipe,
3153 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3173 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3178 HeaderMask = R.getVPSingleValue();
3190 NewR->insertBefore(R);
3191 for (
auto [Old, New] :
3192 zip_equal(R->definedValues(), NewR->definedValues()))
3193 Old->replaceAllUsesWith(New);
3207 Merge->insertBefore(LogicalAnd);
3208 LogicalAnd->replaceAllUsesWith(
Merge);
3216 R->eraseFromParent();
3233 "User of VF that we can't transform to EVL.");
3239 [&LoopRegion, &Plan](
VPUser *U) {
3241 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
3242 m_Specific(&Plan.getVFxUF()))) ||
3243 isa<VPWidenPointerInductionRecipe>(U);
3245 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3246 "increment of the canonical induction.");
3262 MaxEVL = Builder.createScalarZExtOrTrunc(
3266 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3267 VPValue *PrevEVL = Builder.createScalarPhi(
3281 Intrinsic::experimental_vp_splice,
3282 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3286 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3303 VPValue *EVLMask = Builder.createICmp(
3364 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3372 VPValue *StartV = CanonicalIVPHI->getStartValue();
3375 auto *CurrentIteration =
3377 CurrentIteration->insertAfter(CanonicalIVPHI);
3378 VPBuilder Builder(Header, Header->getFirstNonPhi());
3381 VPPhi *AVLPhi = Builder.createScalarPhi(
3385 if (MaxSafeElements) {
3395 auto *CanonicalIVIncrement =
3397 Builder.setInsertPoint(CanonicalIVIncrement);
3401 OpVPEVL = Builder.createScalarZExtOrTrunc(
3402 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3404 auto *NextIter = Builder.createAdd(
3405 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3406 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3407 CurrentIteration->addOperand(NextIter);
3411 "avl.next", {
true,
false});
3419 CanonicalIVPHI->replaceAllUsesWith(CurrentIteration);
3420 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3434 assert(!CurrentIteration &&
3435 "Found multiple CurrentIteration. Only one expected");
3436 CurrentIteration = PhiR;
3440 if (!CurrentIteration)
3451 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3457 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3460 "Unexpected canonical iv");
3466 CanonicalIV->eraseFromParent();
3486 if (!
match(EVLPhi->getBackedgeValue(),
3499 [[maybe_unused]]
bool FoundAVLNext =
3502 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3514 "Expected BranchOnCond with ICmp comparing CanIV increment with vector "
3519 LatchBr->setOperand(
3530 return R->getRegion() ||
3534 for (
const SCEV *Stride : StridesMap.
values()) {
3537 const APInt *StrideConst;
3560 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3567 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3570 if (NewSCEV != ScevExpr) {
3572 ExpSCEV->replaceAllUsesWith(NewExp);
3581 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3585 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3590 while (!Worklist.
empty()) {
3593 if (!Visited.
insert(CurRec).second)
3615 RecWithFlags->isDisjoint()) {
3618 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3619 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3620 RecWithFlags->replaceAllUsesWith(New);
3621 RecWithFlags->eraseFromParent();
3624 RecWithFlags->dropPoisonGeneratingFlags();
3629 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3630 "found instruction with poison generating flags not covered by "
3631 "VPRecipeWithIRFlags");
3636 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3648 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3649 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3650 if (AddrDef && WidenRec->isConsecutive() &&
3651 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3652 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3654 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3658 InterleaveRec->getInterleaveGroup();
3659 bool NeedPredication =
false;
3661 I < NumMembers; ++
I) {
3664 NeedPredication |= BlockNeedsPredication(Member->getParent());
3667 if (NeedPredication)
3668 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3680 if (InterleaveGroups.empty())
3687 for (
const auto *IG : InterleaveGroups) {
3693 StoredValues.
push_back(StoreR->getStoredValue());
3694 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3701 StoredValues.
push_back(StoreR->getStoredValue());
3705 bool NeedsMaskForGaps =
3706 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3707 (!StoredValues.
empty() && !IG->isFull());
3719 VPValue *Addr = Start->getAddr();
3728 assert(IG->getIndex(IRInsertPos) != 0 &&
3729 "index of insert position shouldn't be zero");
3733 IG->getIndex(IRInsertPos),
3737 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3743 if (IG->isReverse()) {
3746 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3747 ReversePtr->insertBefore(InsertPos);
3751 InsertPos->getMask(), NeedsMaskForGaps,
3752 InterleaveMD, InsertPos->getDebugLoc());
3753 VPIG->insertBefore(InsertPos);
3756 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3759 if (!Member->getType()->isVoidTy()) {
3818 AddOp = Instruction::Add;
3819 MulOp = Instruction::Mul;
3821 AddOp =
ID.getInductionOpcode();
3822 MulOp = Instruction::FMul;
3830 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3831 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3840 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3845 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3846 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3852 WidePHI->insertBefore(WidenIVR);
3863 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3867 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3870 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3873 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3880 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3883 WidePHI->addOperand(
Next);
3911 VPlan *Plan = R->getParent()->getPlan();
3912 VPValue *Start = R->getStartValue();
3913 VPValue *Step = R->getStepValue();
3914 VPValue *VF = R->getVFValue();
3916 assert(R->getInductionDescriptor().getKind() ==
3918 "Not a pointer induction according to InductionDescriptor!");
3921 "Recipe should have been replaced");
3927 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3931 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3934 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3936 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3937 R->replaceAllUsesWith(PtrAdd);
3942 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3944 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3947 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3956 if (!R->isReplicator())
3960 R->dissolveToCFGLoop();
3981 assert(Br->getNumOperands() == 2 &&
3982 "BranchOnTwoConds must have exactly 2 conditions");
3986 assert(Successors.size() == 3 &&
3987 "BranchOnTwoConds must have exactly 3 successors");
3992 VPValue *Cond0 = Br->getOperand(0);
3993 VPValue *Cond1 = Br->getOperand(1);
3998 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4011 Br->eraseFromParent();
4034 WidenIVR->replaceAllUsesWith(PtrAdd);
4047 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4048 Select = Builder.createSelect(Blend->getMask(
I),
4049 Blend->getIncomingValue(
I),
Select,
4050 R.getDebugLoc(),
"predphi", *Blend);
4051 Blend->replaceAllUsesWith(
Select);
4056 if (!VEPR->getOffset()) {
4058 "Expected unroller to have materialized offset for UF != 1");
4059 VEPR->materializeOffset();
4074 for (
VPValue *
Op : LastActiveL->operands()) {
4075 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4080 VPValue *FirstInactiveLane = Builder.createNaryOp(
4082 LastActiveL->getDebugLoc(),
"first.inactive.lane");
4088 Builder.createSub(FirstInactiveLane, One,
4089 LastActiveL->getDebugLoc(),
"last.active.lane");
4099 assert(VPI->isMasked() &&
4100 "Unmasked MaskedCond should be simplified earlier");
4101 VPI->replaceAllUsesWith(Builder.createNaryOp(
4111 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4114 ToRemove.push_back(BranchOnCountInst);
4129 ? Instruction::UIToFP
4130 : Instruction::Trunc;
4131 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4137 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4143 MulOpc = Instruction::FMul;
4144 Flags = VPI->getFastMathFlags();
4146 MulOpc = Instruction::Mul;
4151 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4153 VPI->replaceAllUsesWith(VectorStep);
4159 R->eraseFromParent();
4167 struct EarlyExitInfo {
4178 if (Pred == MiddleVPBB)
4183 VPValue *CondOfEarlyExitingVPBB;
4184 [[maybe_unused]]
bool Matched =
4185 match(EarlyExitingVPBB->getTerminator(),
4187 assert(Matched &&
"Terminator must be BranchOnCond");
4191 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4192 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4194 TrueSucc == ExitBlock
4195 ? CondOfEarlyExitingVPBB
4196 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4202 "exit condition must dominate the latch");
4211 assert(!Exits.
empty() &&
"must have at least one early exit");
4218 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4220 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4221 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4227 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4228 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4230 Exits[
I].EarlyExitingVPBB) &&
4231 "RPO sort must place dominating exits before dominated ones");
4237 VPValue *Combined = Exits[0].CondToExit;
4238 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4239 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4245 "Early exit store masking not implemented");
4249 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4253 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4261 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4295 for (
auto [Exit, VectorEarlyExitVPBB] :
4296 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4297 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4309 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4310 VPValue *NewIncoming = IncomingVal;
4312 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4317 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4318 ExitIRI->addOperand(NewIncoming);
4321 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4355 bool IsLastDispatch = (
I + 2 == Exits.
size());
4357 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4363 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4366 CurrentBB = FalseBB;
4373 "Unexpected terminator");
4374 auto *IsLatchExitTaken =
4376 LatchExitingBranch->getOperand(1));
4378 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4379 LatchExitingBranch->eraseFromParent();
4380 Builder.setInsertPoint(LatchVPBB);
4382 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4384 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4395 Type *RedTy = Ctx.Types.inferScalarType(Red);
4396 VPValue *VecOp = Red->getVecOp();
4399 auto IsExtendedRedValidAndClampRange =
4411 if (Red->isPartialReduction()) {
4416 ExtRedCost = Ctx.TTI.getPartialReductionCost(
4417 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
4420 ? std::optional{Red->getFastMathFlags()}
4424 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4425 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4426 Red->getFastMathFlags(),
CostKind);
4428 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4438 IsExtendedRedValidAndClampRange(
4441 Ctx.Types.inferScalarType(
A)))
4460 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4461 Opcode != Instruction::FAdd)
4464 Type *RedTy = Ctx.Types.inferScalarType(Red);
4467 auto IsMulAccValidAndClampRange =
4474 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4477 if (Red->isPartialReduction()) {
4479 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
4482 MulAccCost = Ctx.TTI.getPartialReductionCost(
4483 Opcode, SrcTy, SrcTy2, RedTy, VF,
4492 ? std::optional{Red->getFastMathFlags()}
4498 (Ext0->getOpcode() != Ext1->getOpcode() ||
4499 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4503 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4505 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4513 ExtCost += Ext0->computeCost(VF, Ctx);
4515 ExtCost += Ext1->computeCost(VF, Ctx);
4517 ExtCost += OuterExt->computeCost(VF, Ctx);
4519 return MulAccCost.
isValid() &&
4520 MulAccCost < ExtCost + MulCost + RedCost;
4525 VPValue *VecOp = Red->getVecOp();
4532 assert(Opcode == Instruction::FAdd &&
4533 "MulAccumulateReduction from an FMul must accumulate into an FAdd "
4542 if (RecipeA && RecipeB &&
4543 IsMulAccValidAndClampRange(
FMul, RecipeA, RecipeB,
nullptr)) {
4565 if (!ExtA || ExtB || !
isa<VPIRValue>(ValB) || Red->isPartialReduction())
4567 Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
4581 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4582 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4583 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4584 Mul->setOperand(1, ExtB);
4594 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4599 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4606 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4615 if (!Red->isPartialReduction() &&
4624 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4633 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4634 Ext0->getOpcode() == Ext1->getOpcode() &&
4635 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4637 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4638 *Ext0, *Ext0, Ext0->getDebugLoc());
4639 NewExt0->insertBefore(Ext0);
4644 Ext->getResultType(),
nullptr, *Ext1,
4645 *Ext1, Ext1->getDebugLoc());
4648 Mul->setOperand(0, NewExt0);
4649 Mul->setOperand(1, NewExt1);
4650 Red->setOperand(1,
Mul);
4663 auto IP = std::next(Red->getIterator());
4664 auto *VPBB = Red->getParent();
4674 Red->replaceAllUsesWith(AbstractR);
4704 for (
VPValue *VPV : VPValues) {
4713 if (
User->usesScalars(VPV))
4716 HoistPoint = HoistBlock->
begin();
4720 "All users must be in the vector preheader or dominated by it");
4725 VPV->replaceUsesWithIf(Broadcast,
4726 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4727 return Broadcast != &U && !U.usesScalars(VPV);
4744 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4745 RepR->getOpcode() != Instruction::Load)
4748 VPValue *Addr = RepR->getOperand(0);
4751 if (!
Loc.AATags.Scope)
4756 if (R.mayWriteToMemory()) {
4758 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4766 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4770 const AAMDNodes &LoadAA = LoadLoc.AATags;
4786 return CommonMetadata;
4789template <
unsigned Opcode>
4794 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4795 "Only Load and Store opcodes supported");
4796 constexpr bool IsLoad = (Opcode == Instruction::Load);
4802 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4807 for (
auto Recipes :
Groups) {
4808 if (Recipes.size() < 2)
4816 VPValue *MaskI = RecipeI->getMask();
4817 Type *TypeI = GetLoadStoreValueType(RecipeI);
4823 bool HasComplementaryMask =
false;
4828 VPValue *MaskJ = RecipeJ->getMask();
4829 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4830 if (TypeI == TypeJ) {
4840 if (HasComplementaryMask) {
4841 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4851template <
typename InstType>
4869 for (
auto &Group :
Groups) {
4889 return R->isSingleScalar() == IsSingleScalar;
4891 "all members in group must agree on IsSingleScalar");
4896 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4897 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4899 UnpredicatedLoad->insertBefore(EarliestLoad);
4903 Load->replaceAllUsesWith(UnpredicatedLoad);
4904 Load->eraseFromParent();
4914 if (!StoreLoc || !StoreLoc->AATags.Scope)
4920 StoresToSink.
end());
4924 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4938 for (
auto &Group :
Groups) {
4951 VPValue *SelectedValue = Group[0]->getOperand(0);
4954 bool IsSingleScalar = Group[0]->isSingleScalar();
4955 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4956 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4957 "all members in group must agree on IsSingleScalar");
4958 VPValue *Mask = Group[
I]->getMask();
4960 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4969 StoreWithMinAlign->getUnderlyingInstr(),
4970 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4971 nullptr, *LastStore, CommonMetadata);
4972 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4976 Store->eraseFromParent();
4983 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4984 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5049 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5051 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5058 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5068 DefR->replaceUsesWithIf(
5069 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5071 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5085 for (
VPValue *Def : R.definedValues()) {
5098 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5100 return U->usesScalars(Def) &&
5103 if (
none_of(Def->users(), IsCandidateUnpackUser))
5110 Unpack->insertAfter(&R);
5111 Def->replaceUsesWithIf(Unpack,
5112 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5113 return IsCandidateUnpackUser(&U);
5123 bool RequiresScalarEpilogue,
5135 assert(StepR->getParent() == VectorPHVPBB &&
5136 "Step must be defined in VectorPHVPBB");
5138 InsertPt = std::next(StepR->getIterator());
5140 VPBuilder Builder(VectorPHVPBB, InsertPt);
5148 if (TailByMasking) {
5149 TC = Builder.createAdd(
5160 Builder.createNaryOp(Instruction::URem, {TC, Step},
5169 if (RequiresScalarEpilogue) {
5171 "requiring scalar epilogue is not supported with fail folding");
5174 R = Builder.createSelect(IsZero, Step, R);
5188 "VF and VFxUF must be materialized together");
5200 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5207 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5211 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5215 VPValue *MulByUF = Builder.createOverflowingOp(
5227 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5235 const SCEV *Expr = ExpSCEV->getSCEV();
5238 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5243 ExpSCEV->eraseFromParent();
5246 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5247 "before any VPIRInstructions");
5250 auto EI = Entry->begin();
5260 return ExpandedSCEVs;
5272 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5276 return Member0Op == OpV;
5280 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5283 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5300 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5303 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5308 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5309 const auto &[
OpIdx, OpV] =
P;
5324 if (!InterleaveR || InterleaveR->
getMask())
5325 return std::nullopt;
5327 Type *GroupElementTy =
nullptr;
5331 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5332 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5334 return std::nullopt;
5339 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5340 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5342 return std::nullopt;
5346 if (IG->getFactor() != IG->getNumMembers())
5347 return std::nullopt;
5353 assert(
Size.isScalable() == VF.isScalable() &&
5354 "if Size is scalable, VF must be scalable and vice versa");
5355 return Size.getKnownMinValue();
5359 unsigned MinVal = VF.getKnownMinValue();
5361 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5364 return std::nullopt;
5372 return RepR && RepR->isSingleScalar();
5379 auto *R = V->getDefiningRecipe();
5388 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5389 WideMember0->setOperand(
5398 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5400 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
5401 false, {}, LoadGroup->getDebugLoc());
5402 L->insertBefore(LoadGroup);
5408 assert(RepR->isSingleScalar() &&
5410 "must be a single scalar load");
5411 NarrowedOps.
insert(RepR);
5416 VPValue *PtrOp = WideLoad->getAddr();
5418 PtrOp = VecPtr->getOperand(0);
5423 nullptr, {}, *WideLoad);
5424 N->insertBefore(WideLoad);
5429std::unique_ptr<VPlan>
5449 "unexpected branch-on-count");
5453 std::optional<ElementCount> VFToOptimize;
5470 if (R.mayWriteToMemory() && !InterleaveR)
5485 std::optional<ElementCount> NarrowedVF =
5487 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5489 VFToOptimize = NarrowedVF;
5492 if (InterleaveR->getStoredValues().empty())
5497 auto *Member0 = InterleaveR->getStoredValues()[0];
5507 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5510 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5511 return IR && IR->getInterleaveGroup()->isFull() &&
5512 IR->getVPValue(Op.index()) == Op.value();
5521 VFToOptimize->isScalable()))
5526 if (StoreGroups.
empty())
5530 bool RequiresScalarEpilogue =
5541 std::unique_ptr<VPlan> NewPlan;
5543 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5544 Plan.
setVF(*VFToOptimize);
5545 NewPlan->removeVF(*VFToOptimize);
5551 for (
auto *StoreGroup : StoreGroups) {
5557 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5558 false, {}, StoreGroup->getDebugLoc());
5559 S->insertBefore(StoreGroup);
5560 StoreGroup->eraseFromParent();
5572 if (VFToOptimize->isScalable()) {
5585 RequiresScalarEpilogue, Step);
5587 Inc->setOperand(1, Step);
5593 "All VPVectorPointerRecipes should have been removed");
5609 "must have a BranchOnCond");
5612 if (VF.
isScalable() && VScaleForTuning.has_value())
5613 VectorStep *= *VScaleForTuning;
5614 assert(VectorStep > 0 &&
"trip count should not be zero");
5618 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5625 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5637 "Cannot handle loops with uncountable early exits");
5710 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5724 "vector.recur.extract.for.phi");
5742 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
bool UseMax,
5743 bool Signed) -> std::optional<APInt> {
5754 return std::nullopt;
5762 PhiR->getRecurrenceKind()))
5771 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5772 VPValue *CondSelect = BackedgeVal;
5787 VPValue *
IV = TrueVal == PhiR ? FalseVal : TrueVal;
5801 bool UseSigned =
true;
5802 std::optional<APInt> SentinelVal =
5803 CheckSentinel(IVSCEV, UseMax,
true);
5805 SentinelVal = CheckSentinel(IVSCEV, UseMax,
false);
5814 if (AR->hasNoSignedWrap())
5816 else if (AR->hasNoUnsignedWrap())
5829 VPIRFlags Flags(MinMaxKind,
false,
false,
5838 VPValue *StartVPV = PhiR->getStartValue();
5846 MiddleBuilder.
createSelect(Cmp, ReducedIV, StartVPV, ExitDL);
5855 AnyOfPhi->insertAfter(PhiR);
5859 if (TrueVal == PhiR)
5866 {StartVPV, ReducedIV, OrVal}, {}, ExitDL);
5878 *CondSelect,
RdxUnordered{1}, {}, PhiR->hasUsesOutsideReductionChain());
5879 NewPhiR->insertBefore(PhiR);
5880 PhiR->replaceAllUsesWith(NewPhiR);
5881 PhiR->eraseFromParent();
5889struct ExtendedReductionOperand {
5892 std::array<VPWidenCastRecipe *, 2> CastRecipes = {};
5898struct VPPartialReductionChain {
5901 VPWidenRecipe *ReductionBinOp;
5903 ExtendedReductionOperand ExtendedOp;
5904 unsigned ScaleFactor;
5927 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
5930 BinOp->
setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
5943 if (!
Mul->hasOneUse() ||
5944 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
5945 MulLHS->getOpcode() != MulRHS->getOpcode())
5948 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
5949 MulLHS->getOperand(0),
5950 Ext->getResultType()));
5951 Mul->setOperand(1, MulLHS == MulRHS
5952 ?
Mul->getOperand(0)
5953 : Builder.createWidenCast(MulRHS->getOpcode(),
5954 MulRHS->getOperand(0),
5955 Ext->getResultType()));
5964static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
5993 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6003 Builder.insert(NegRecipe);
6008 BinOp = optimizeExtendsForPartialReduction(BinOp, TypeInfo);
6018 assert((!ExitValue || IsLastInChain) &&
6019 "if we found ExitValue, it must match RdxPhi's backedge value");
6030 PartialRed->insertBefore(WidenRecipe);
6047 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6048 StartInst->setOperand(2, NewScaleFactor);
6056 VPValue *OldStartValue = StartInst->getOperand(0);
6057 StartInst->setOperand(0, StartInst->getOperand(1));
6061 assert(RdxResult &&
"Could not find reduction result");
6064 constexpr unsigned SubOpc = Instruction::BinaryOps::Sub;
6070 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6076static bool isValidPartialReduction(
const VPPartialReductionChain &Chain,
6080 -> std::pair<Type *, TargetTransformInfo::PartialReductionExtendKind> {
6086 return {ExtOpType, ExtKind};
6088 ExtendedReductionOperand ExtendedOp = Chain.ExtendedOp;
6092 Type *ExtOpTypeA, *ExtOpTypeB;
6094 std::tie(ExtOpTypeA, ExtKindA) = GetExtInfo(ExtendA);
6095 std::tie(ExtOpTypeB, ExtKindB) = GetExtInfo(ExtendB);
6099 if (!ExtendB && ExtendedOp.BinOp &&
6100 ExtendedOp.BinOp != Chain.ReductionBinOp) {
6108 ExtOpTypeB = ExtOpTypeA;
6109 ExtKindB = ExtKindA;
6112 std::optional<unsigned> BinOpc;
6113 if (ExtendedOp.BinOp && ExtendedOp.BinOp != Chain.ReductionBinOp)
6121 WidenRecipe->
getOpcode(), ExtOpTypeA, ExtOpTypeB, PhiType, VF,
6122 ExtKindA, ExtKindB, BinOpc, CostCtx.
CostKind,
6124 ? std::optional{WidenRecipe->getFastMathFlags()}
6150static std::optional<ExtendedReductionOperand>
6153 "Op should be operand of UpdateR");
6155 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6158 VPValue *CastSource = CastRecipe->getOperand(0);
6167 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6168 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6169 UpdateR->
getOpcode() == Instruction::FAdd) {
6173 return ExtendedReductionOperand{UpdateR, {CastRecipe,
nullptr}};
6177 if (!
Op->hasOneUse())
6178 return std::nullopt;
6187 return std::nullopt;
6197 return std::nullopt;
6207 return std::nullopt;
6211 if (Cast && OuterExtKind &&
6212 getPartialReductionExtendKind(Cast) != OuterExtKind)
6213 return std::nullopt;
6215 return ExtendedReductionOperand{BinOp, {LHSCast, RHSCast}};
6222static std::optional<SmallVector<VPPartialReductionChain>>
6230 return std::nullopt;
6240 VPValue *CurrentValue = ExitValue;
6241 while (CurrentValue != RedPhiR) {
6244 return std::nullopt;
6251 std::optional<ExtendedReductionOperand> ExtendedOp =
6252 matchExtendedReductionOperand(UpdateR,
Op);
6254 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue);
6256 return std::nullopt;
6261 ExtendedOp->CastRecipes[0]->getOperand(0));
6264 return std::nullopt;
6266 VPPartialReductionChain Chain(
6267 {UpdateR, *ExtendedOp,
6269 if (!isValidPartialReduction(Chain, PhiType, CostCtx,
Range))
6270 return std::nullopt;
6273 CurrentValue = PrevValue;
6278 std::reverse(Chains.
begin(), Chains.
end());
6297 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6298 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6301 if (ChainsByPhi.
empty())
6308 for (
const auto &[
_, Chains] : ChainsByPhi)
6309 for (
const VPPartialReductionChain &Chain : Chains) {
6310 PartialReductionOps.
insert(Chain.ExtendedOp.BinOp);
6311 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6319 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6328 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6329 for (
const VPPartialReductionChain &Chain : Chains) {
6330 if (!
all_of(Chain.ExtendedOp.CastRecipes, ExtendUsersValid)) {
6334 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6336 return PhiR == RedPhiR;
6338 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6344 if (!
all_of(Chain.ReductionBinOp->
users(), UseIsValid)) {
6353 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6354 return RepR && isa<StoreInst>(RepR->getUnderlyingInstr());
6363 for (
auto &[Phi, Chains] : ChainsByPhi)
6364 for (
const VPPartialReductionChain &Chain : Chains)
6365 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
@ NoAlias
The two locations do not alias at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
bind_ty< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...