57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
82 false , *VPI, Ingredient.getDebugLoc());
85 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
86 nullptr ,
false , *VPI,
87 Ingredient.getDebugLoc());
90 Ingredient.getDebugLoc());
102 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
107 if (VectorID == Intrinsic::assume ||
108 VectorID == Intrinsic::lifetime_end ||
109 VectorID == Intrinsic::lifetime_start ||
110 VectorID == Intrinsic::sideeffect ||
111 VectorID == Intrinsic::pseudoprobe) {
116 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
117 VectorID != Intrinsic::pseudoprobe;
121 Ingredient.getDebugLoc());
124 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
125 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
129 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
133 *VPI, Ingredient.getDebugLoc());
137 "inductions must be created earlier");
146 "Only recpies with zero or one defined values expected");
147 Ingredient.eraseFromParent();
164 if (
A->getOpcode() != Instruction::Store ||
165 B->getOpcode() != Instruction::Store)
175 const APInt *Distance;
181 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
183 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
189 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
191 auto VFs =
B->getParent()->getPlan()->vectorFactors();
195 return Distance->
abs().
uge(
203 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
204 L(L), TypeInfo(TypeInfo) {}
211 return ExcludeRecipes.contains(&R) ||
212 (Store && isNoAliasViaDistance(Store, &GroupLeader));
225 std::optional<SinkStoreInfo> SinkInfo = {}) {
226 bool CheckReads = SinkInfo.has_value();
233 if (SinkInfo && SinkInfo->shouldSkip(R))
237 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
255template <
unsigned Opcode>
260 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
261 "Only Load and Store opcodes supported");
262 constexpr bool IsLoad = (Opcode == Instruction::Load);
269 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
273 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
276 RecipesByAddress[AddrSCEV].push_back(RepR);
281 for (
auto &Group :
Groups) {
294 bool Sinking =
false) {
303 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
308 return RepR && RepR->getOpcode() == Instruction::Alloca;
317 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
329 if (Candidate->getParent() == SinkTo ||
334 if (!ScalarVFOnly && RepR->isSingleScalar())
337 WorkList.
insert({SinkTo, Candidate});
349 for (
auto &Recipe : *VPBB)
351 InsertIfValidSinkCandidate(VPBB,
Op);
355 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
358 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
363 auto UsersOutsideSinkTo =
365 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
367 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
368 return !U->usesFirstLaneOnly(SinkCandidate);
371 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
373 if (NeedsDuplicating) {
377 if (
auto *SinkCandidateRepR =
383 nullptr , *SinkCandidateRepR,
387 Clone = SinkCandidate->
clone();
397 InsertIfValidSinkCandidate(SinkTo,
Op);
407 if (!EntryBB || EntryBB->size() != 1 ||
417 if (EntryBB->getNumSuccessors() != 2)
422 if (!Succ0 || !Succ1)
425 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
427 if (Succ0->getSingleSuccessor() == Succ1)
429 if (Succ1->getSingleSuccessor() == Succ0)
446 if (!Region1->isReplicator())
448 auto *MiddleBasicBlock =
450 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
455 if (!Region2 || !Region2->isReplicator())
460 if (!Mask1 || Mask1 != Mask2)
463 assert(Mask1 && Mask2 &&
"both region must have conditions");
469 if (TransformedRegions.
contains(Region1))
476 if (!Then1 || !Then2)
496 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
502 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
503 Phi1ToMove.eraseFromParent();
506 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
520 TransformedRegions.
insert(Region1);
523 return !TransformedRegions.
empty();
530 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
531 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
532 auto *BlockInMask = PredRecipe->
getMask();
551 RecipeWithoutMask->getDebugLoc());
575 if (RepR->isPredicated())
594 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
606 if (!VPBB->getParent())
610 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
619 R.moveBefore(*PredVPBB, PredVPBB->
end());
621 auto *ParentRegion = VPBB->getParent();
622 if (ParentRegion && ParentRegion->getExiting() == VPBB)
623 ParentRegion->setExiting(PredVPBB);
627 return !WorkList.
empty();
634 bool ShouldSimplify =
true;
635 while (ShouldSimplify) {
651 if (!
IV ||
IV->getTruncInst())
666 for (
auto *U : FindMyCast->
users()) {
668 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
669 FoundUserCast = UserCast;
673 FindMyCast = FoundUserCast;
698 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
712 WidenOriginalIV->dropPoisonGeneratingFlags();
725 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
727 if (IsConditionalAssume)
730 if (R.mayHaveSideEffects())
734 return all_of(R.definedValues(),
735 [](
VPValue *V) { return V->getNumUsers() == 0; });
755 VPUser *PhiUser = PhiR->getSingleUser();
761 PhiR->replaceAllUsesWith(Start);
762 PhiR->eraseFromParent();
778 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
788 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
794 if (ResultTy != StepTy) {
801 Builder.setInsertPoint(VecPreheader);
802 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
804 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
810 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
815 Users.insert_range(V->users());
817 return Users.takeVector();
831 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
868 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
869 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
877 Def->operands(),
true,
879 Clone->insertAfter(Def);
880 Def->replaceAllUsesWith(Clone);
891 PtrIV->replaceAllUsesWith(PtrAdd);
898 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
899 return U->usesScalars(WideIV);
905 Plan,
ID.getKind(),
ID.getInductionOpcode(),
907 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
908 WideIV->getDebugLoc(), Builder);
911 if (!HasOnlyVectorVFs) {
913 "plans containing a scalar VF cannot also include scalable VFs");
914 WideIV->replaceAllUsesWith(Steps);
917 WideIV->replaceUsesWithIf(Steps,
918 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
920 return U.usesFirstLaneOnly(WideIV);
921 return U.usesScalars(WideIV);
937 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
942 if (!Def || Def->getNumOperands() != 2)
950 auto IsWideIVInc = [&]() {
951 auto &
ID = WideIV->getInductionDescriptor();
954 VPValue *IVStep = WideIV->getStepValue();
955 switch (
ID.getInductionOpcode()) {
956 case Instruction::Add:
958 case Instruction::FAdd:
960 case Instruction::FSub:
963 case Instruction::Sub: {
983 return IsWideIVInc() ? WideIV :
nullptr;
1003 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1016 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
1017 FirstActiveLaneType,
DL);
1018 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1023 if (Incoming != WideIV) {
1025 EndValue =
B.createAdd(EndValue, One,
DL);
1028 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1030 VPIRValue *Start = WideIV->getStartValue();
1031 VPValue *Step = WideIV->getStepValue();
1032 EndValue =
B.createDerivedIV(
1034 Start, EndValue, Step);
1049 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1056 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1059 Start, VectorTC, Step);
1088 assert(EndValue &&
"Must have computed the end value up front");
1093 if (Incoming != WideIV)
1104 auto *Zero = Plan.
getZero(StepTy);
1105 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1110 return B.createNaryOp(
1111 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1113 : Instruction::FAdd,
1114 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1126 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1135 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1136 EndValues[WideIV] = EndValue;
1146 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1147 R.eraseFromParent();
1156 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1158 if (PredVPBB == MiddleVPBB)
1160 ExitIRI->getOperand(Idx),
1164 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1166 ExitIRI->setOperand(Idx, Escape);
1183 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1186 ExpR->replaceAllUsesWith(V->second);
1187 ExpR->eraseFromParent();
1196 while (!WorkList.
empty()) {
1198 if (!Seen.
insert(Cur).second)
1206 R->eraseFromParent();
1213static std::optional<std::pair<bool, unsigned>>
1216 std::optional<std::pair<bool, unsigned>>>(R)
1219 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1221 return std::make_pair(
true,
I->getVectorIntrinsicID());
1223 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1229 I->getVPRecipeID());
1231 .
Default([](
auto *) {
return std::nullopt; });
1249 Value *V =
Op->getUnderlyingValue();
1255 auto FoldToIRValue = [&]() ->
Value * {
1257 if (OpcodeOrIID->first) {
1258 if (R.getNumOperands() != 2)
1260 unsigned ID = OpcodeOrIID->second;
1261 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1264 unsigned Opcode = OpcodeOrIID->second;
1273 return Folder.FoldSelect(
Ops[0],
Ops[1],
1276 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1278 case Instruction::Select:
1279 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1280 case Instruction::ICmp:
1281 case Instruction::FCmp:
1284 case Instruction::GetElementPtr: {
1287 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1297 case Instruction::ExtractElement:
1304 if (
Value *V = FoldToIRValue())
1305 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1311 VPlan *Plan = Def->getParent()->getPlan();
1317 return Def->replaceAllUsesWith(V);
1323 PredPHI->replaceAllUsesWith(
Op);
1336 bool CanCreateNewRecipe =
1343 if (TruncTy == ATy) {
1344 Def->replaceAllUsesWith(
A);
1353 : Instruction::ZExt;
1356 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1358 Ext->setUnderlyingValue(UnderlyingExt);
1360 Def->replaceAllUsesWith(Ext);
1362 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1363 Def->replaceAllUsesWith(Trunc);
1371 for (
VPUser *U :
A->users()) {
1373 for (
VPValue *VPV : R->definedValues())
1387 Def->replaceAllUsesWith(
X);
1388 Def->eraseFromParent();
1394 return Def->replaceAllUsesWith(
1399 return Def->replaceAllUsesWith(
X);
1403 return Def->replaceAllUsesWith(
1408 return Def->replaceAllUsesWith(
1413 return Def->replaceAllUsesWith(
X);
1417 return Def->replaceAllUsesWith(Plan->
getFalse());
1421 return Def->replaceAllUsesWith(
X);
1424 if (CanCreateNewRecipe &&
1429 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1430 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1431 return Def->replaceAllUsesWith(
1432 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1437 return Def->replaceAllUsesWith(Def->getOperand(1));
1442 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1446 return Def->replaceAllUsesWith(Plan->
getFalse());
1449 return Def->replaceAllUsesWith(
X);
1453 if (CanCreateNewRecipe &&
1455 return Def->replaceAllUsesWith(Builder.createNot(
C));
1459 Def->setOperand(0,
C);
1460 Def->setOperand(1,
Y);
1461 Def->setOperand(2,
X);
1466 return Def->replaceAllUsesWith(
A);
1469 return Def->replaceAllUsesWith(
A);
1472 return Def->replaceAllUsesWith(
1479 return Def->replaceAllUsesWith(
1481 Def->getDebugLoc(),
"", NW));
1484 if (CanCreateNewRecipe &&
1492 ->hasNoSignedWrap()};
1493 return Def->replaceAllUsesWith(
1494 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1500 return Def->replaceAllUsesWith(Builder.createNaryOp(
1502 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1507 return Def->replaceAllUsesWith(Builder.createNaryOp(
1509 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1514 return Def->replaceAllUsesWith(
A);
1529 R->setOperand(1,
Y);
1530 R->setOperand(2,
X);
1534 R->replaceAllUsesWith(Cmp);
1539 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1540 Cmp->setDebugLoc(Def->getDebugLoc());
1552 if (
Op->getNumUsers() > 1 ||
1556 }
else if (!UnpairedCmp) {
1557 UnpairedCmp =
Op->getDefiningRecipe();
1561 UnpairedCmp =
nullptr;
1568 if (NewOps.
size() < Def->getNumOperands()) {
1570 return Def->replaceAllUsesWith(NewAnyOf);
1577 if (CanCreateNewRecipe &&
1583 return Def->replaceAllUsesWith(NewCmp);
1591 return Def->replaceAllUsesWith(Def->getOperand(1));
1597 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1598 Def->replaceAllUsesWith(
X);
1608 Def->setOperand(1, Def->getOperand(0));
1609 Def->setOperand(0,
Y);
1616 return Def->replaceAllUsesWith(Def->getOperand(0));
1622 Def->replaceAllUsesWith(
1623 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1627 return Def->replaceAllUsesWith(
A);
1633 Def->replaceAllUsesWith(
1634 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1641 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1646 Def->replaceAllUsesWith(
1656 "broadcast operand must be single-scalar");
1657 Def->setOperand(0,
C);
1662 if (Def->getNumOperands() == 1) {
1663 Def->replaceAllUsesWith(Def->getOperand(0));
1668 Phi->replaceAllUsesWith(Phi->getOperand(0));
1674 if (Def->getNumOperands() == 1 &&
1676 return Def->replaceAllUsesWith(IRV);
1689 return Def->replaceAllUsesWith(
A);
1692 Def->replaceAllUsesWith(Builder.createNaryOp(
1693 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1707 auto *IVInc = Def->getOperand(0);
1708 if (IVInc->getNumUsers() == 2) {
1713 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1714 Def->replaceAllUsesWith(IVInc);
1716 Inc->replaceAllUsesWith(Phi);
1717 Phi->setOperand(0,
Y);
1733 Steps->replaceAllUsesWith(Steps->getOperand(0));
1741 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1743 return PhiR && PhiR->isInLoop();
1749 Def->replaceAllUsesWith(
A);
1758 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1759 return Def->replaceAllUsesWith(
A);
1763 return Def->replaceAllUsesWith(
A);
1790 while (!Worklist.
empty()) {
1799 R->replaceAllUsesWith(
1800 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1819 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1828 !WidenStoreR->isConsecutive()) {
1829 VPValue *Mask = WidenStoreR->getMask();
1838 {WidenStoreR->getOperand(1)});
1843 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1844 true ,
nullptr , {},
1846 ScalarStore->insertBefore(WidenStoreR);
1847 WidenStoreR->eraseFromParent();
1852 if (RepR && RepR->getOpcode() == Instruction::Store &&
1855 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1856 true ,
nullptr , *RepR ,
1857 *RepR , RepR->getDebugLoc());
1858 Clone->insertBefore(RepOrWidenR);
1860 VPValue *ExtractOp = Clone->getOperand(0);
1866 Clone->setOperand(0, ExtractOp);
1867 RepR->eraseFromParent();
1876 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1885 return !U->usesScalars(
Op);
1889 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1892 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1893 IntroducesBCastOf(Op)))
1897 auto *IRV = dyn_cast<VPIRValue>(Op);
1898 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1899 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1900 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1905 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1906 true ,
nullptr, *RepOrWidenR);
1907 Clone->insertBefore(RepOrWidenR);
1908 RepOrWidenR->replaceAllUsesWith(Clone);
1910 RepOrWidenR->eraseFromParent();
1946 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1947 UniqueValues.
insert(Blend->getIncomingValue(0));
1948 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1950 UniqueValues.
insert(Blend->getIncomingValue(
I));
1952 if (UniqueValues.
size() == 1) {
1953 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1954 Blend->eraseFromParent();
1958 if (Blend->isNormalized())
1964 unsigned StartIndex = 0;
1965 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1970 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1977 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1979 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1980 if (
I == StartIndex)
1982 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1983 OperandsWithMask.
push_back(Blend->getMask(
I));
1988 OperandsWithMask, *Blend, Blend->getDebugLoc());
1989 NewBlend->insertBefore(&R);
1991 VPValue *DeadMask = Blend->getMask(StartIndex);
1993 Blend->eraseFromParent();
1998 if (NewBlend->getNumOperands() == 3 &&
2000 VPValue *Inc0 = NewBlend->getOperand(0);
2001 VPValue *Inc1 = NewBlend->getOperand(1);
2002 VPValue *OldMask = NewBlend->getOperand(2);
2003 NewBlend->setOperand(0, Inc1);
2004 NewBlend->setOperand(1, Inc0);
2005 NewBlend->setOperand(2, NewMask);
2032 APInt MaxVal = AlignedTC - 1;
2035 unsigned NewBitWidth =
2041 bool MadeChange =
false;
2050 if (!WideIV || !WideIV->isCanonical() ||
2051 WideIV->hasMoreThanOneUniqueUser() ||
2052 NewIVTy == WideIV->getScalarType())
2057 VPUser *SingleUser = WideIV->getSingleUser();
2065 auto *NewStart = Plan.
getZero(NewIVTy);
2066 WideIV->setStartValue(NewStart);
2068 WideIV->setStepValue(NewStep);
2075 Cmp->setOperand(1, NewBTC);
2089 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2091 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2105 const SCEV *VectorTripCount =
2110 "Trip count SCEV must be computable");
2131 auto *Term = &ExitingVPBB->
back();
2144 for (
unsigned Part = 0; Part < UF; ++Part) {
2150 Extracts[Part] = Ext;
2162 match(Phi->getBackedgeValue(),
2164 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2181 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2188 "Expected incoming values of Phi to be ActiveLaneMasks");
2193 EntryALM->setOperand(2, ALMMultiplier);
2194 LoopALM->setOperand(2, ALMMultiplier);
2198 ExtractFromALM(EntryALM, EntryExtracts);
2203 ExtractFromALM(LoopALM, LoopExtracts);
2205 Not->setOperand(0, LoopExtracts[0]);
2208 for (
unsigned Part = 0; Part < UF; ++Part) {
2209 Phis[Part]->setStartValue(EntryExtracts[Part]);
2210 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2223 auto *Term = &ExitingVPBB->
back();
2235 const SCEV *VectorTripCount =
2241 "Trip count SCEV must be computable");
2260 Term->setOperand(1, Plan.
getTrue());
2265 {}, Term->getDebugLoc());
2267 Term->eraseFromParent();
2302 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2312 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2313 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2341 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2344 if (SinkCandidate == Previous)
2348 !Seen.
insert(SinkCandidate).second ||
2361 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2364 "only recipes with a single defined value expected");
2379 if (SinkCandidate == FOR)
2382 SinkCandidate->moveAfter(Previous);
2383 Previous = SinkCandidate;
2407 [&VPDT, HoistPoint](
VPUser *U) {
2408 auto *R = cast<VPRecipeBase>(U);
2409 return HoistPoint == R ||
2410 VPDT.properlyDominates(HoistPoint, R);
2412 "HoistPoint must dominate all users of FOR");
2414 auto NeedsHoisting = [HoistPoint, &VPDT,
2416 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2417 if (!HoistCandidate)
2422 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2423 "CFG in VPlan should still be flat, without replicate regions");
2425 if (!Visited.
insert(HoistCandidate).second)
2437 return HoistCandidate;
2446 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2449 "only recipes with a single defined value expected");
2461 if (
auto *R = NeedsHoisting(
Op)) {
2464 if (R->getNumDefinedValues() != 1)
2478 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2498 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2501 while (
auto *PrevPhi =
2503 assert(PrevPhi->getParent() == FOR->getParent());
2505 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2524 {FOR, FOR->getBackedgeValue()});
2529 RecurSplice->setOperand(0, FOR);
2535 for (
VPUser *U : RecurSplice->users()) {
2545 VPValue *PenultimateIndex =
B.createSub(LastActiveLane, One);
2546 VPValue *PenultimateLastIter =
2548 {PenultimateIndex, FOR->getBackedgeValue()});
2553 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2566 RecurKind RK = PhiR->getRecurrenceKind();
2573 RecWithFlags->dropPoisonGeneratingFlags();
2579struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2581 return Def == getEmptyKey() || Def == getTombstoneKey();
2592 return GEP->getSourceElementType();
2595 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2596 [](
auto *
I) {
return I->getSourceElementType(); })
2597 .
Default([](
auto *) {
return nullptr; });
2601 static bool canHandle(
const VPSingleDefRecipe *Def) {
2610 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2611 C->second == Instruction::ExtractValue)))
2617 return !
Def->mayReadFromMemory();
2621 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2622 const VPlan *Plan =
Def->getParent()->getPlan();
2623 VPTypeAnalysis TypeInfo(*Plan);
2626 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2629 if (RFlags->hasPredicate())
2632 return hash_combine(Result, SIVSteps->getInductionOpcode());
2637 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2640 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2642 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2644 !
equal(
L->operands(),
R->operands()))
2647 "must have valid opcode info for both recipes");
2649 if (LFlags->hasPredicate() &&
2650 LFlags->getPredicate() !=
2654 if (LSIV->getInductionOpcode() !=
2660 const VPRegionBlock *RegionL =
L->getRegion();
2661 const VPRegionBlock *RegionR =
R->getRegion();
2664 L->getParent() !=
R->getParent())
2666 const VPlan *Plan =
L->getParent()->getPlan();
2667 VPTypeAnalysis TypeInfo(*Plan);
2668 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2684 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2688 if (!VPDT.
dominates(V->getParent(), VPBB))
2693 Def->replaceAllUsesWith(V);
2712 "Expected vector prehader's successor to be the vector loop region");
2719 return !Op->isDefinedOutsideLoopRegions();
2722 R.moveBefore(*Preheader, Preheader->
end());
2740 assert(!RepR->isPredicated() &&
2741 "Expected prior transformation of predicated replicates to "
2742 "replicate regions");
2747 if (!RepR->isSingleScalar())
2759 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2760 auto *UserR = cast<VPRecipeBase>(U);
2761 VPBasicBlock *Parent = UserR->getParent();
2763 if (SinkBB && SinkBB != Parent)
2768 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2769 Parent->getSinglePredecessor() != LoopRegion;
2779 "Defining block must dominate sink block");
2805 VPValue *ResultVPV = R.getVPSingleValue();
2807 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2808 if (!NewResSizeInBits)
2821 (void)OldResSizeInBits;
2829 VPW->dropPoisonGeneratingFlags();
2831 if (OldResSizeInBits != NewResSizeInBits &&
2835 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2837 Ext->insertAfter(&R);
2839 Ext->setOperand(0, ResultVPV);
2840 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2843 "Only ICmps should not need extending the result.");
2853 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2854 auto *
Op = R.getOperand(Idx);
2855 unsigned OpSizeInBits =
2857 if (OpSizeInBits == NewResSizeInBits)
2859 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2860 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2862 R.setOperand(Idx, ProcessedIter->second);
2870 Builder.setInsertPoint(&R);
2872 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2873 ProcessedIter->second = NewOp;
2874 R.setOperand(Idx, NewOp);
2882 std::optional<VPDominatorTree> VPDT;
2899 assert(VPBB->getNumSuccessors() == 2 &&
2900 "Two successors expected for BranchOnCond");
2901 unsigned RemovedIdx;
2912 "There must be a single edge between VPBB and its successor");
2920 VPBB->back().eraseFromParent();
2932 if (Reachable.contains(
B))
2943 for (
VPValue *Def : R.definedValues())
2944 Def->replaceAllUsesWith(&Tmp);
2945 R.eraseFromParent();
3004 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
3015 auto *EntryIncrement = Builder.createOverflowingOp(
3017 DL,
"index.part.next");
3023 {EntryIncrement, TC, ALMMultiplier},
DL,
3024 "active.lane.mask.entry");
3031 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
3036 Builder.setInsertPoint(OriginalTerminator);
3037 auto *InLoopIncrement = Builder.createOverflowingOp(
3039 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
3041 {InLoopIncrement, TC, ALMMultiplier},
DL,
3042 "active.lane.mask.next");
3047 auto *NotMask = Builder.createNot(ALM,
DL);
3054 bool UseActiveLaneMaskForControlFlow) {
3056 auto *FoundWidenCanonicalIVUser =
find_if(
3058 assert(FoundWidenCanonicalIVUser &&
3059 "Must have widened canonical IV when tail folding!");
3061 auto *WideCanonicalIV =
3064 if (UseActiveLaneMaskForControlFlow) {
3073 nullptr,
"active.lane.mask");
3089 template <
typename OpTy>
bool match(OpTy *V)
const {
3100template <
typename Op0_t,
typename Op1_t>
3119 VPValue *Addr, *Mask, *EndPtr;
3122 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3124 EVLEndPtr->insertBefore(&CurRecipe);
3125 EVLEndPtr->setOperand(1, &EVL);
3129 auto GetVPReverse = [&CurRecipe, &EVL, &TypeInfo, Plan,
3134 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
3136 Reverse->insertBefore(&CurRecipe);
3140 if (
match(&CurRecipe,
3151 Mask = GetVPReverse(Mask);
3152 Addr = AdjustEndPtr(EndPtr);
3155 LoadR->insertBefore(&CurRecipe);
3157 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
3165 StoredVal, EVL, Mask);
3167 if (
match(&CurRecipe,
3171 Mask = GetVPReverse(Mask);
3172 Addr = AdjustEndPtr(EndPtr);
3173 StoredVal = GetVPReverse(ReversedVal);
3175 StoredVal, EVL, Mask);
3179 if (Rdx->isConditional() &&
3184 if (Interleave->getMask() &&
3189 if (
match(&CurRecipe,
3198 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3212 if (
match(&CurRecipe,
3226 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3231 HeaderMask = R.getVPSingleValue();
3243 NewR->insertBefore(R);
3244 for (
auto [Old, New] :
3245 zip_equal(R->definedValues(), NewR->definedValues()))
3246 Old->replaceAllUsesWith(New);
3260 Merge->insertBefore(LogicalAnd);
3261 LogicalAnd->replaceAllUsesWith(
Merge);
3269 R->eraseFromParent();
3286 "User of VF that we can't transform to EVL.");
3296 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3297 "increment of the canonical induction.");
3313 MaxEVL = Builder.createScalarZExtOrTrunc(
3317 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3318 VPValue *PrevEVL = Builder.createScalarPhi(
3332 Intrinsic::experimental_vp_splice,
3333 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3337 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3350 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3351 m_VPValue(), m_VPValue()))))
3352 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3353 Plan.getVectorLoopRegion();
3365 VPValue *EVLMask = Builder.createICmp(
3425 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3437 auto *CurrentIteration =
3439 CurrentIteration->insertBefore(*Header, Header->begin());
3440 VPBuilder Builder(Header, Header->getFirstNonPhi());
3443 VPPhi *AVLPhi = Builder.createScalarPhi(
3447 if (MaxSafeElements) {
3457 Builder.setInsertPoint(CanonicalIVIncrement);
3461 OpVPEVL = Builder.createScalarZExtOrTrunc(
3462 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3464 auto *NextIter = Builder.createAdd(
3465 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3466 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3467 CurrentIteration->addOperand(NextIter);
3471 "avl.next", {
true,
false});
3479 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3480 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3494 assert(!CurrentIteration &&
3495 "Found multiple CurrentIteration. Only one expected");
3496 CurrentIteration = PhiR;
3500 if (!CurrentIteration)
3511 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3520 CanIVInc->eraseFromParent();
3529 if (Header->empty())
3538 if (!
match(EVLPhi->getBackedgeValue(),
3551 [[maybe_unused]]
bool FoundAVLNext =
3554 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3562 [[maybe_unused]]
bool FoundIncrement =
match(
3569 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3574 LatchBr->setOperand(
3585 return R->getRegion() ||
3589 for (
const SCEV *Stride : StridesMap.
values()) {
3592 const APInt *StrideConst;
3615 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3622 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3625 if (NewSCEV != ScevExpr) {
3627 ExpSCEV->replaceAllUsesWith(NewExp);
3636 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3640 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3645 while (!Worklist.
empty()) {
3648 if (!Visited.
insert(CurRec).second)
3670 RecWithFlags->isDisjoint()) {
3673 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3674 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3675 RecWithFlags->replaceAllUsesWith(New);
3676 RecWithFlags->eraseFromParent();
3679 RecWithFlags->dropPoisonGeneratingFlags();
3684 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3685 "found instruction with poison generating flags not covered by "
3686 "VPRecipeWithIRFlags");
3691 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3704 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3705 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3706 if (AddrDef && WidenRec->isConsecutive() &&
3707 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3708 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3710 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3714 InterleaveRec->getInterleaveGroup();
3715 bool NeedPredication =
false;
3717 I < NumMembers; ++
I) {
3720 NeedPredication |= BlockNeedsPredication(Member->getParent());
3723 if (NeedPredication)
3724 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3736 if (InterleaveGroups.empty())
3743 for (
const auto *IG : InterleaveGroups) {
3749 StoredValues.
push_back(StoreR->getStoredValue());
3750 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3757 StoredValues.
push_back(StoreR->getStoredValue());
3761 bool NeedsMaskForGaps =
3762 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3763 (!StoredValues.
empty() && !IG->isFull());
3775 VPValue *Addr = Start->getAddr();
3784 assert(IG->getIndex(IRInsertPos) != 0 &&
3785 "index of insert position shouldn't be zero");
3789 IG->getIndex(IRInsertPos),
3793 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3799 if (IG->isReverse()) {
3802 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3803 ReversePtr->insertBefore(InsertPos);
3807 InsertPos->getMask(), NeedsMaskForGaps,
3808 InterleaveMD, InsertPos->getDebugLoc());
3809 VPIG->insertBefore(InsertPos);
3812 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3815 if (!Member->getType()->isVoidTy()) {
3874 AddOp = Instruction::Add;
3875 MulOp = Instruction::Mul;
3877 AddOp =
ID.getInductionOpcode();
3878 MulOp = Instruction::FMul;
3886 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3887 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3896 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3901 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3902 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3918 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3922 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3925 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3928 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3935 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3938 WidePHI->addOperand(
Next);
3966 VPlan *Plan = R->getParent()->getPlan();
3967 VPValue *Start = R->getStartValue();
3968 VPValue *Step = R->getStepValue();
3969 VPValue *VF = R->getVFValue();
3971 assert(R->getInductionDescriptor().getKind() ==
3973 "Not a pointer induction according to InductionDescriptor!");
3976 "Recipe should have been replaced");
3982 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3986 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3989 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3991 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3992 R->replaceAllUsesWith(PtrAdd);
3997 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3999 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
4002 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
4011 if (!R->isReplicator())
4015 R->dissolveToCFGLoop();
4036 assert(Br->getNumOperands() == 2 &&
4037 "BranchOnTwoConds must have exactly 2 conditions");
4041 assert(Successors.size() == 3 &&
4042 "BranchOnTwoConds must have exactly 3 successors");
4047 VPValue *Cond0 = Br->getOperand(0);
4048 VPValue *Cond1 = Br->getOperand(1);
4053 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4066 Br->eraseFromParent();
4089 WidenIVR->replaceAllUsesWith(PtrAdd);
4102 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4103 Select = Builder.createSelect(Blend->getMask(
I),
4104 Blend->getIncomingValue(
I),
Select,
4105 R.getDebugLoc(),
"predphi", *Blend);
4106 Blend->replaceAllUsesWith(
Select);
4111 if (!VEPR->getOffset()) {
4113 "Expected unroller to have materialized offset for UF != 1");
4114 VEPR->materializeOffset();
4129 for (
VPValue *
Op : LastActiveL->operands()) {
4130 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4135 VPValue *FirstInactiveLane = Builder.createNaryOp(
4137 LastActiveL->getDebugLoc(),
"first.inactive.lane");
4143 Builder.createSub(FirstInactiveLane, One,
4144 LastActiveL->getDebugLoc(),
"last.active.lane");
4154 assert(VPI->isMasked() &&
4155 "Unmasked MaskedCond should be simplified earlier");
4156 VPI->replaceAllUsesWith(Builder.createNaryOp(
4168 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4169 VPI->getDebugLoc());
4170 VPI->replaceAllUsesWith(
Add);
4179 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4182 ToRemove.push_back(BranchOnCountInst);
4197 ? Instruction::UIToFP
4198 : Instruction::Trunc;
4199 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4205 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4211 MulOpc = Instruction::FMul;
4212 Flags = VPI->getFastMathFlags();
4214 MulOpc = Instruction::Mul;
4219 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4221 VPI->replaceAllUsesWith(VectorStep);
4227 R->eraseFromParent();
4235 struct EarlyExitInfo {
4246 if (Pred == MiddleVPBB)
4251 VPValue *CondOfEarlyExitingVPBB;
4252 [[maybe_unused]]
bool Matched =
4253 match(EarlyExitingVPBB->getTerminator(),
4255 assert(Matched &&
"Terminator must be BranchOnCond");
4259 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4260 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4262 TrueSucc == ExitBlock
4263 ? CondOfEarlyExitingVPBB
4264 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4270 "exit condition must dominate the latch");
4279 assert(!Exits.
empty() &&
"must have at least one early exit");
4286 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4288 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4289 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4295 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4296 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4298 Exits[
I].EarlyExitingVPBB) &&
4299 "RPO sort must place dominating exits before dominated ones");
4305 VPValue *Combined = Exits[0].CondToExit;
4306 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4307 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4313 "Early exit store masking not implemented");
4317 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4321 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4329 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4363 for (
auto [Exit, VectorEarlyExitVPBB] :
4364 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4365 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4377 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4378 VPValue *NewIncoming = IncomingVal;
4380 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4385 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4386 ExitIRI->addOperand(NewIncoming);
4389 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4423 bool IsLastDispatch = (
I + 2 == Exits.
size());
4425 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4431 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4434 CurrentBB = FalseBB;
4441 "Unexpected terminator");
4442 auto *IsLatchExitTaken =
4444 LatchExitingBranch->getOperand(1));
4446 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4447 LatchExitingBranch->eraseFromParent();
4448 Builder.setInsertPoint(LatchVPBB);
4450 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4452 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4463 Type *RedTy = Ctx.Types.inferScalarType(Red);
4464 VPValue *VecOp = Red->getVecOp();
4466 assert(!Red->isPartialReduction() &&
4467 "This path does not support partial reductions");
4470 auto IsExtendedRedValidAndClampRange =
4483 "getExtendedReductionCost only supports integer types");
4484 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4485 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4486 Red->getFastMathFlags(),
CostKind);
4487 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4495 IsExtendedRedValidAndClampRange(
4498 Ctx.Types.inferScalarType(
A)))
4517 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4518 Opcode != Instruction::FAdd)
4521 assert(!Red->isPartialReduction() &&
4522 "This path does not support partial reductions");
4523 Type *RedTy = Ctx.Types.inferScalarType(Red);
4526 auto IsMulAccValidAndClampRange =
4533 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4539 (Ext0->getOpcode() != Ext1->getOpcode() ||
4540 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4544 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4546 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4553 ExtCost += Ext0->computeCost(VF, Ctx);
4555 ExtCost += Ext1->computeCost(VF, Ctx);
4557 ExtCost += OuterExt->computeCost(VF, Ctx);
4559 return MulAccCost.
isValid() &&
4560 MulAccCost < ExtCost + MulCost + RedCost;
4565 VPValue *VecOp = Red->getVecOp();
4603 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4604 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4605 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4606 Mul->setOperand(1, ExtB);
4616 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4621 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4628 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4645 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4654 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4655 Ext0->getOpcode() == Ext1->getOpcode() &&
4656 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4658 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4659 *Ext0, *Ext0, Ext0->getDebugLoc());
4660 NewExt0->insertBefore(Ext0);
4665 Ext->getResultType(),
nullptr, *Ext1,
4666 *Ext1, Ext1->getDebugLoc());
4669 Mul->setOperand(0, NewExt0);
4670 Mul->setOperand(1, NewExt1);
4671 Red->setOperand(1,
Mul);
4685 assert(!Red->isPartialReduction() &&
4686 "This path does not support partial reductions");
4689 auto IP = std::next(Red->getIterator());
4690 auto *VPBB = Red->getParent();
4700 Red->replaceAllUsesWith(AbstractR);
4730 for (
VPValue *VPV : VPValues) {
4739 if (
User->usesScalars(VPV))
4742 HoistPoint = HoistBlock->
begin();
4746 "All users must be in the vector preheader or dominated by it");
4751 VPV->replaceUsesWithIf(Broadcast,
4752 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4753 return Broadcast != &U && !U.usesScalars(VPV);
4770 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4771 RepR->getOpcode() != Instruction::Load)
4774 VPValue *Addr = RepR->getOperand(0);
4777 if (!
Loc.AATags.Scope)
4782 if (R.mayWriteToMemory()) {
4784 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4792 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4796 const AAMDNodes &LoadAA = LoadLoc.AATags;
4812 return CommonMetadata;
4815template <
unsigned Opcode>
4820 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4821 "Only Load and Store opcodes supported");
4822 constexpr bool IsLoad = (Opcode == Instruction::Load);
4828 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4833 for (
auto Recipes :
Groups) {
4834 if (Recipes.size() < 2)
4842 VPValue *MaskI = RecipeI->getMask();
4843 Type *TypeI = GetLoadStoreValueType(RecipeI);
4849 bool HasComplementaryMask =
false;
4854 VPValue *MaskJ = RecipeJ->getMask();
4855 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4856 if (TypeI == TypeJ) {
4866 if (HasComplementaryMask) {
4867 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4877template <
typename InstType>
4895 for (
auto &Group :
Groups) {
4915 return R->isSingleScalar() == IsSingleScalar;
4917 "all members in group must agree on IsSingleScalar");
4922 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4923 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4925 UnpredicatedLoad->insertBefore(EarliestLoad);
4929 Load->replaceAllUsesWith(UnpredicatedLoad);
4930 Load->eraseFromParent();
4940 if (!StoreLoc || !StoreLoc->AATags.Scope)
4946 StoresToSink.
end());
4950 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4964 for (
auto &Group :
Groups) {
4977 VPValue *SelectedValue = Group[0]->getOperand(0);
4980 bool IsSingleScalar = Group[0]->isSingleScalar();
4981 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4982 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4983 "all members in group must agree on IsSingleScalar");
4984 VPValue *Mask = Group[
I]->getMask();
4986 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4995 StoreWithMinAlign->getUnderlyingInstr(),
4996 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4997 nullptr, *LastStore, CommonMetadata);
4998 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
5002 Store->eraseFromParent();
5009 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
5010 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5075 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5077 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5084 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5094 DefR->replaceUsesWithIf(
5095 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5097 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5111 for (
VPValue *Def : R.definedValues()) {
5124 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5126 return U->usesScalars(Def) &&
5129 if (
none_of(Def->users(), IsCandidateUnpackUser))
5136 Unpack->insertAfter(&R);
5137 Def->replaceUsesWithIf(Unpack,
5138 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5139 return IsCandidateUnpackUser(&U);
5148 bool RequiresScalarEpilogue,
VPValue *Step,
5149 std::optional<uint64_t> MaxRuntimeStep) {
5160 assert(StepR->getParent() == VectorPHVPBB &&
5161 "Step must be defined in VectorPHVPBB");
5163 InsertPt = std::next(StepR->getIterator());
5165 VPBuilder Builder(VectorPHVPBB, InsertPt);
5171 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5183 if (TailByMasking) {
5184 TC = Builder.createAdd(
5195 Builder.createNaryOp(Instruction::URem, {TC, Step},
5204 if (RequiresScalarEpilogue) {
5206 "requiring scalar epilogue is not supported with fail folding");
5209 R = Builder.createSelect(IsZero, Step, R);
5223 "VF and VFxUF must be materialized together");
5235 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5242 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5246 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5250 VPValue *MulByUF = Builder.createOverflowingOp(
5262 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5270 const SCEV *Expr = ExpSCEV->getSCEV();
5273 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5278 ExpSCEV->eraseFromParent();
5281 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5282 "before any VPIRInstructions");
5285 auto EI = Entry->begin();
5295 return ExpandedSCEVs;
5307 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5311 return Member0Op == OpV;
5315 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5318 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5335 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5338 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5343 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5344 const auto &[
OpIdx, OpV] =
P;
5359 if (!InterleaveR || InterleaveR->
getMask())
5360 return std::nullopt;
5362 Type *GroupElementTy =
nullptr;
5366 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5367 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5369 return std::nullopt;
5374 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5375 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5377 return std::nullopt;
5381 if (IG->getFactor() != IG->getNumMembers())
5382 return std::nullopt;
5388 assert(
Size.isScalable() == VF.isScalable() &&
5389 "if Size is scalable, VF must be scalable and vice versa");
5390 return Size.getKnownMinValue();
5394 unsigned MinVal = VF.getKnownMinValue();
5396 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5399 return std::nullopt;
5407 return RepR && RepR->isSingleScalar();
5414 auto *R = V->getDefiningRecipe();
5423 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5424 WideMember0->setOperand(
5433 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5435 LoadGroup->getMask(),
true,
5436 {}, LoadGroup->getDebugLoc());
5437 L->insertBefore(LoadGroup);
5443 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5444 "must be a single scalar load");
5445 NarrowedOps.
insert(RepR);
5450 VPValue *PtrOp = WideLoad->getAddr();
5452 PtrOp = VecPtr->getOperand(0);
5457 nullptr, {}, *WideLoad);
5458 N->insertBefore(WideLoad);
5463std::unique_ptr<VPlan>
5483 "unexpected branch-on-count");
5487 std::optional<ElementCount> VFToOptimize;
5501 if (R.mayWriteToMemory() && !InterleaveR)
5516 std::optional<ElementCount> NarrowedVF =
5518 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5520 VFToOptimize = NarrowedVF;
5523 if (InterleaveR->getStoredValues().empty())
5528 auto *Member0 = InterleaveR->getStoredValues()[0];
5538 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5541 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5542 return IR && IR->getInterleaveGroup()->isFull() &&
5543 IR->getVPValue(Op.index()) == Op.value();
5552 VFToOptimize->isScalable()))
5557 if (StoreGroups.
empty())
5561 bool RequiresScalarEpilogue =
5572 std::unique_ptr<VPlan> NewPlan;
5574 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5575 Plan.
setVF(*VFToOptimize);
5576 NewPlan->removeVF(*VFToOptimize);
5582 for (
auto *StoreGroup : StoreGroups) {
5589 StoreGroup->getDebugLoc());
5590 S->insertBefore(StoreGroup);
5591 StoreGroup->eraseFromParent();
5603 if (VFToOptimize->isScalable()) {
5615 RequiresScalarEpilogue, Step);
5623 "All VPVectorPointerRecipes should have been removed");
5639 "must have a BranchOnCond");
5642 if (VF.
isScalable() && VScaleForTuning.has_value())
5643 VectorStep *= *VScaleForTuning;
5644 assert(VectorStep > 0 &&
"trip count should not be zero");
5648 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5655 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5667 "Cannot handle loops with uncountable early exits");
5740 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5754 "vector.recur.extract.for.phi");
5773 VPValue *WidenIVCandidate = BinOp->getOperand(0);
5774 VPValue *InvariantCandidate = BinOp->getOperand(1);
5776 std::swap(WidenIVCandidate, InvariantCandidate);
5790 auto *ClonedOp = BinOp->
clone();
5791 if (ClonedOp->getOperand(0) == WidenIV) {
5792 ClonedOp->setOperand(0, ScalarIV);
5794 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
5795 ClonedOp->setOperand(1, ScalarIV);
5810 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
5811 bool UseMax) -> std::optional<APSInt> {
5813 for (
bool Signed : {
true,
false}) {
5822 return std::nullopt;
5830 PhiR->getRecurrenceKind()))
5839 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5853 !
match(FindLastSelect,
5862 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
5868 "IVOfExpressionToSink not being an AddRec must imply "
5869 "FindLastExpression not being an AddRec.");
5880 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
5881 bool UseSigned = SentinelVal && SentinelVal->isSigned();
5888 if (IVOfExpressionToSink) {
5889 const SCEV *FindLastExpressionSCEV =
5891 if (
match(FindLastExpressionSCEV,
5894 if (
auto NewSentinel =
5895 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
5898 SentinelVal = *NewSentinel;
5899 UseSigned = NewSentinel->isSigned();
5901 IVSCEV = FindLastExpressionSCEV;
5902 IVOfExpressionToSink =
nullptr;
5912 if (AR->hasNoSignedWrap())
5914 else if (AR->hasNoUnsignedWrap())
5924 VPValue *NewFindLastSelect = BackedgeVal;
5926 if (!SentinelVal || IVOfExpressionToSink) {
5929 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
5930 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
5931 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
5932 SelectCond = LoopBuilder.
createNot(SelectCond);
5939 if (SelectCond !=
Cond || IVOfExpressionToSink) {
5942 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
5951 VPIRFlags Flags(MinMaxKind,
false,
false,
5957 NewFindLastSelect, Flags, ExitDL);
5960 VPValue *VectorRegionExitingVal = ReducedIV;
5961 if (IVOfExpressionToSink)
5962 VectorRegionExitingVal =
5964 ReducedIV, IVOfExpressionToSink);
5967 VPValue *StartVPV = PhiR->getStartValue();
5974 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
5984 AnyOfPhi->insertAfter(PhiR);
5991 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
6004 PhiR->hasUsesOutsideReductionChain());
6005 NewPhiR->insertBefore(PhiR);
6006 PhiR->replaceAllUsesWith(NewPhiR);
6007 PhiR->eraseFromParent();
6014struct ReductionExtend {
6015 Type *SrcType =
nullptr;
6016 ExtendKind Kind = ExtendKind::PR_None;
6022struct ExtendedReductionOperand {
6026 ReductionExtend ExtendA, ExtendB;
6034struct VPPartialReductionChain {
6037 VPWidenRecipe *ReductionBinOp =
nullptr;
6039 ExtendedReductionOperand ExtendedOp;
6046 unsigned AccumulatorOpIdx;
6047 unsigned ScaleFactor;
6060 if (!
Op->hasOneUse() ||
6066 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6067 Op->getOperand(1), NarrowTy);
6069 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6078 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
6080 assert(Ext->getOpcode() ==
6082 "Expected both the LHS and RHS extends to be the same");
6083 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6086 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6087 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6088 auto *
Max = Builder.insert(
6090 {FreezeX, FreezeY}, SrcTy));
6091 auto *Min = Builder.insert(
6093 {FreezeX, FreezeY}, SrcTy));
6096 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6109 if (!
Mul->hasOneUse() ||
6110 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6111 MulLHS->getOpcode() != MulRHS->getOpcode())
6114 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
6115 MulLHS->getOperand(0),
6116 Ext->getResultType()));
6117 Mul->setOperand(1, MulLHS == MulRHS
6118 ?
Mul->getOperand(0)
6119 : Builder.createWidenCast(MulRHS->getOpcode(),
6120 MulRHS->getOperand(0),
6121 Ext->getResultType()));
6130 VPValue *VecOp = Red->getVecOp();
6164static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6172 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6188 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6196 Builder.insert(NegRecipe);
6197 ExtendedOp = NegRecipe;
6201 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp, TypeInfo);
6211 assert((!ExitValue || IsLastInChain) &&
6212 "if we found ExitValue, it must match RdxPhi's backedge value");
6223 PartialRed->insertBefore(WidenRecipe);
6231 E->insertBefore(WidenRecipe);
6232 PartialRed->replaceAllUsesWith(
E);
6245 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6246 StartInst->setOperand(2, NewScaleFactor);
6254 VPValue *OldStartValue = StartInst->getOperand(0);
6255 StartInst->setOperand(0, StartInst->getOperand(1));
6259 assert(RdxResult &&
"Could not find reduction result");
6262 constexpr unsigned SubOpc = Instruction::BinaryOps::Sub;
6268 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6274 const VPPartialReductionChain &Link,
6277 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6278 std::optional<unsigned> BinOpc = std::nullopt;
6280 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6281 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6283 std::optional<llvm::FastMathFlags>
Flags;
6288 ? (unsigned)Instruction::Add
6291 Opcode, ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType, RdxType,
6292 VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6315static std::optional<ExtendedReductionOperand>
6319 "Op should be operand of UpdateR");
6327 if (
Op->hasOneUse() &&
6337 if (LHSInputType != RHSInputType ||
6338 LHSExt->getOpcode() != RHSExt->getOpcode())
6339 return std::nullopt;
6342 return ExtendedReductionOperand{
6344 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6348 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6351 VPValue *CastSource = CastRecipe->getOperand(0);
6352 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6361 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6362 UpdateR->
getOpcode() == Instruction::FAdd) {
6366 return ExtendedReductionOperand{
6373 if (!
Op->hasOneUse())
6374 return std::nullopt;
6379 return std::nullopt;
6389 return std::nullopt;
6393 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6396 const APInt *RHSConst =
nullptr;
6402 return std::nullopt;
6406 if (Cast && OuterExtKind &&
6407 getPartialReductionExtendKind(Cast) != OuterExtKind)
6408 return std::nullopt;
6410 Type *RHSInputType = LHSInputType;
6411 ExtendKind RHSExtendKind = LHSExtendKind;
6414 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6417 return ExtendedReductionOperand{
6418 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6425static std::optional<SmallVector<VPPartialReductionChain>>
6433 return std::nullopt;
6444 VPValue *CurrentValue = ExitValue;
6445 while (CurrentValue != RedPhiR) {
6448 return std::nullopt;
6455 std::optional<ExtendedReductionOperand> ExtendedOp =
6456 matchExtendedReductionOperand(UpdateR,
Op, TypeInfo);
6458 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue, TypeInfo);
6460 return std::nullopt;
6464 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6467 return std::nullopt;
6472 VPPartialReductionChain Link(
6473 {UpdateR, *ExtendedOp, RK,
6477 CurrentValue = PrevValue;
6482 std::reverse(Chain.
begin(), Chain.
end());
6501 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6502 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6505 if (ChainsByPhi.
empty())
6512 for (
const auto &[
_, Chains] : ChainsByPhi)
6513 for (
const VPPartialReductionChain &Chain : Chains) {
6514 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6515 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6521 auto ExtendUsersValid = [&](
VPValue *Ext) {
6523 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6527 auto IsProfitablePartialReductionChainForVF =
6534 for (
const VPPartialReductionChain &Link : Chain) {
6535 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6536 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6540 PartialCost += LinkCost;
6541 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6543 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6544 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6547 RegularCost += Extend->computeCost(VF, CostCtx);
6549 return PartialCost.
isValid() && PartialCost < RegularCost;
6557 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6558 for (
const VPPartialReductionChain &Chain : Chains) {
6559 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6563 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6565 return PhiR == RedPhiR;
6567 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6573 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6582 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6583 return RepR && RepR->getOpcode() == Instruction::Store;
6594 return IsProfitablePartialReductionChainForVF(Chains, VF);
6600 for (
auto &[Phi, Chains] : ChainsByPhi)
6601 for (
const VPPartialReductionChain &Chain : Chains)
6602 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
6616 if (VPI && VPI->getUnderlyingValue() &&
6630 New->insertBefore(VPI);
6631 if (VPI->getOpcode() == Instruction::Load)
6632 VPI->replaceAllUsesWith(New->getVPSingleValue());
6633 VPI->eraseFromParent();
6638 FinalRedStoresBuilder))
6647 ReplaceWith(Histogram);
6655 ReplaceWith(Recipe);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
void setRecipe(Instruction *I, VPRecipeBase *R)
Set the recipe created for given ingredient.
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...