57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
82 false , *VPI, Ingredient.getDebugLoc());
85 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
86 nullptr ,
false , *VPI,
87 Ingredient.getDebugLoc());
90 Ingredient.getDebugLoc());
102 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
107 if (VectorID == Intrinsic::assume ||
108 VectorID == Intrinsic::lifetime_end ||
109 VectorID == Intrinsic::lifetime_start ||
110 VectorID == Intrinsic::sideeffect ||
111 VectorID == Intrinsic::pseudoprobe) {
116 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
117 VectorID != Intrinsic::pseudoprobe;
121 Ingredient.getDebugLoc());
124 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
125 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
129 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
133 *VPI, Ingredient.getDebugLoc());
137 "inductions must be created earlier");
146 "Only recpies with zero or one defined values expected");
147 Ingredient.eraseFromParent();
164 if (
A->getOpcode() != Instruction::Store ||
165 B->getOpcode() != Instruction::Store)
175 const APInt *Distance;
181 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
183 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
189 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
191 auto VFs =
B->getParent()->getPlan()->vectorFactors();
195 return Distance->
abs().
uge(
203 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
204 L(L), TypeInfo(TypeInfo) {}
211 return ExcludeRecipes.contains(&R) ||
212 (Store && isNoAliasViaDistance(Store, &GroupLeader));
225 std::optional<SinkStoreInfo> SinkInfo = {}) {
226 bool CheckReads = SinkInfo.has_value();
233 if (SinkInfo && SinkInfo->shouldSkip(R))
237 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
255template <
unsigned Opcode>
260 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
261 "Only Load and Store opcodes supported");
262 constexpr bool IsLoad = (Opcode == Instruction::Load);
269 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
273 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
276 RecipesByAddress[AddrSCEV].push_back(RepR);
281 for (
auto &Group :
Groups) {
294 bool Sinking =
false) {
303 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
308 return RepR && RepR->getOpcode() == Instruction::Alloca;
317 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
329 if (Candidate->getParent() == SinkTo ||
334 if (!ScalarVFOnly && RepR->isSingleScalar())
337 WorkList.
insert({SinkTo, Candidate});
349 for (
auto &Recipe : *VPBB)
351 InsertIfValidSinkCandidate(VPBB,
Op);
355 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
358 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
363 auto UsersOutsideSinkTo =
365 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
367 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
368 return !U->usesFirstLaneOnly(SinkCandidate);
371 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
373 if (NeedsDuplicating) {
377 if (
auto *SinkCandidateRepR =
383 nullptr , *SinkCandidateRepR,
387 Clone = SinkCandidate->
clone();
397 InsertIfValidSinkCandidate(SinkTo,
Op);
407 if (!EntryBB || EntryBB->size() != 1 ||
417 if (EntryBB->getNumSuccessors() != 2)
422 if (!Succ0 || !Succ1)
425 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
427 if (Succ0->getSingleSuccessor() == Succ1)
429 if (Succ1->getSingleSuccessor() == Succ0)
446 if (!Region1->isReplicator())
448 auto *MiddleBasicBlock =
450 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
455 if (!Region2 || !Region2->isReplicator())
460 if (!Mask1 || Mask1 != Mask2)
463 assert(Mask1 && Mask2 &&
"both region must have conditions");
469 if (TransformedRegions.
contains(Region1))
476 if (!Then1 || !Then2)
496 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
502 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
503 Phi1ToMove.eraseFromParent();
506 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
520 TransformedRegions.
insert(Region1);
523 return !TransformedRegions.
empty();
530 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
531 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
532 auto *BlockInMask = PredRecipe->
getMask();
551 RecipeWithoutMask->getDebugLoc());
575 if (RepR->isPredicated())
594 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
606 if (!VPBB->getParent())
610 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
619 R.moveBefore(*PredVPBB, PredVPBB->
end());
621 auto *ParentRegion = VPBB->getParent();
622 if (ParentRegion && ParentRegion->getExiting() == VPBB)
623 ParentRegion->setExiting(PredVPBB);
627 return !WorkList.
empty();
634 bool ShouldSimplify =
true;
635 while (ShouldSimplify) {
651 if (!
IV ||
IV->getTruncInst())
666 for (
auto *U : FindMyCast->
users()) {
668 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
669 FoundUserCast = UserCast;
673 FindMyCast = FoundUserCast;
698 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
712 WidenOriginalIV->dropPoisonGeneratingFlags();
725 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
727 if (IsConditionalAssume)
730 if (R.mayHaveSideEffects())
734 return all_of(R.definedValues(),
735 [](
VPValue *V) { return V->getNumUsers() == 0; });
755 VPUser *PhiUser = PhiR->getSingleUser();
761 PhiR->replaceAllUsesWith(Start);
762 PhiR->eraseFromParent();
778 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
788 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
794 if (ResultTy != StepTy) {
801 Builder.setInsertPoint(VecPreheader);
802 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
804 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
810 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
815 Users.insert_range(V->users());
817 return Users.takeVector();
831 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
868 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
869 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
882 Def->operands(),
true,
884 Clone->insertAfter(Def);
885 Def->replaceAllUsesWith(Clone);
896 PtrIV->replaceAllUsesWith(PtrAdd);
903 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
904 return U->usesScalars(WideIV);
910 Plan,
ID.getKind(),
ID.getInductionOpcode(),
912 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
913 WideIV->getDebugLoc(), Builder);
916 if (!HasOnlyVectorVFs) {
918 "plans containing a scalar VF cannot also include scalable VFs");
919 WideIV->replaceAllUsesWith(Steps);
922 WideIV->replaceUsesWithIf(Steps,
923 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
925 return U.usesFirstLaneOnly(WideIV);
926 return U.usesScalars(WideIV);
942 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
947 if (!Def || Def->getNumOperands() != 2)
955 auto IsWideIVInc = [&]() {
956 auto &
ID = WideIV->getInductionDescriptor();
959 VPValue *IVStep = WideIV->getStepValue();
960 switch (
ID.getInductionOpcode()) {
961 case Instruction::Add:
963 case Instruction::FAdd:
965 case Instruction::FSub:
968 case Instruction::Sub: {
988 return IsWideIVInc() ? WideIV :
nullptr;
1008 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1021 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
1022 FirstActiveLaneType,
DL);
1023 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1028 if (Incoming != WideIV) {
1030 EndValue =
B.createAdd(EndValue, One,
DL);
1033 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1035 VPIRValue *Start = WideIV->getStartValue();
1036 VPValue *Step = WideIV->getStepValue();
1037 EndValue =
B.createDerivedIV(
1039 Start, EndValue, Step);
1054 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1061 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1064 Start, VectorTC, Step);
1093 assert(EndValue &&
"Must have computed the end value up front");
1098 if (Incoming != WideIV)
1109 auto *Zero = Plan.
getZero(StepTy);
1110 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1115 return B.createNaryOp(
1116 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1118 : Instruction::FAdd,
1119 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1131 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1140 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1141 EndValues[WideIV] = EndValue;
1151 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1152 R.eraseFromParent();
1161 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1163 if (PredVPBB == MiddleVPBB)
1165 ExitIRI->getOperand(Idx),
1169 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1171 ExitIRI->setOperand(Idx, Escape);
1188 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1191 ExpR->replaceAllUsesWith(V->second);
1192 ExpR->eraseFromParent();
1201 while (!WorkList.
empty()) {
1203 if (!Seen.
insert(Cur).second)
1211 R->eraseFromParent();
1218static std::optional<std::pair<bool, unsigned>>
1221 std::optional<std::pair<bool, unsigned>>>(R)
1224 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1226 return std::make_pair(
true,
I->getVectorIntrinsicID());
1228 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1234 I->getVPRecipeID());
1236 .
Default([](
auto *) {
return std::nullopt; });
1254 Value *V =
Op->getUnderlyingValue();
1260 auto FoldToIRValue = [&]() ->
Value * {
1262 if (OpcodeOrIID->first) {
1263 if (R.getNumOperands() != 2)
1265 unsigned ID = OpcodeOrIID->second;
1266 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1269 unsigned Opcode = OpcodeOrIID->second;
1278 return Folder.FoldSelect(
Ops[0],
Ops[1],
1281 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1283 case Instruction::Select:
1284 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1285 case Instruction::ICmp:
1286 case Instruction::FCmp:
1289 case Instruction::GetElementPtr: {
1292 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1302 case Instruction::ExtractElement:
1309 if (
Value *V = FoldToIRValue())
1310 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1316 VPlan *Plan = Def->getParent()->getPlan();
1322 return Def->replaceAllUsesWith(V);
1328 PredPHI->replaceAllUsesWith(
Op);
1341 bool CanCreateNewRecipe =
1348 if (TruncTy == ATy) {
1349 Def->replaceAllUsesWith(
A);
1358 : Instruction::ZExt;
1361 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1363 Ext->setUnderlyingValue(UnderlyingExt);
1365 Def->replaceAllUsesWith(Ext);
1367 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1368 Def->replaceAllUsesWith(Trunc);
1376 for (
VPUser *U :
A->users()) {
1378 for (
VPValue *VPV : R->definedValues())
1392 Def->replaceAllUsesWith(
X);
1393 Def->eraseFromParent();
1399 return Def->replaceAllUsesWith(
1404 return Def->replaceAllUsesWith(
X);
1408 return Def->replaceAllUsesWith(
1413 return Def->replaceAllUsesWith(
1418 return Def->replaceAllUsesWith(
X);
1422 return Def->replaceAllUsesWith(Plan->
getFalse());
1426 return Def->replaceAllUsesWith(
X);
1429 if (CanCreateNewRecipe &&
1434 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1435 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1436 return Def->replaceAllUsesWith(
1437 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1442 return Def->replaceAllUsesWith(Def->getOperand(1));
1447 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1451 return Def->replaceAllUsesWith(Plan->
getFalse());
1454 return Def->replaceAllUsesWith(
X);
1458 if (CanCreateNewRecipe &&
1460 return Def->replaceAllUsesWith(Builder.createNot(
C));
1464 Def->setOperand(0,
C);
1465 Def->setOperand(1,
Y);
1466 Def->setOperand(2,
X);
1471 return Def->replaceAllUsesWith(
A);
1474 return Def->replaceAllUsesWith(
A);
1477 return Def->replaceAllUsesWith(
1484 return Def->replaceAllUsesWith(
1486 Def->getDebugLoc(),
"", NW));
1489 if (CanCreateNewRecipe &&
1497 ->hasNoSignedWrap()};
1498 return Def->replaceAllUsesWith(
1499 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1505 return Def->replaceAllUsesWith(Builder.createNaryOp(
1507 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1512 return Def->replaceAllUsesWith(Builder.createNaryOp(
1514 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1519 return Def->replaceAllUsesWith(
A);
1534 R->setOperand(1,
Y);
1535 R->setOperand(2,
X);
1539 R->replaceAllUsesWith(Cmp);
1544 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1545 Cmp->setDebugLoc(Def->getDebugLoc());
1557 if (
Op->getNumUsers() > 1 ||
1561 }
else if (!UnpairedCmp) {
1562 UnpairedCmp =
Op->getDefiningRecipe();
1566 UnpairedCmp =
nullptr;
1573 if (NewOps.
size() < Def->getNumOperands()) {
1575 return Def->replaceAllUsesWith(NewAnyOf);
1582 if (CanCreateNewRecipe &&
1588 return Def->replaceAllUsesWith(NewCmp);
1596 return Def->replaceAllUsesWith(Def->getOperand(1));
1602 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1603 Def->replaceAllUsesWith(
X);
1613 Def->setOperand(1, Def->getOperand(0));
1614 Def->setOperand(0,
Y);
1621 return Def->replaceAllUsesWith(Def->getOperand(0));
1627 Def->replaceAllUsesWith(
1628 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1632 return Def->replaceAllUsesWith(
A);
1638 Def->replaceAllUsesWith(
1639 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1646 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1651 Def->replaceAllUsesWith(
1661 "broadcast operand must be single-scalar");
1662 Def->setOperand(0,
C);
1667 if (Def->getNumOperands() == 1) {
1668 Def->replaceAllUsesWith(Def->getOperand(0));
1673 Phi->replaceAllUsesWith(Phi->getOperand(0));
1679 if (Def->getNumOperands() == 1 &&
1681 return Def->replaceAllUsesWith(IRV);
1694 return Def->replaceAllUsesWith(
A);
1697 Def->replaceAllUsesWith(Builder.createNaryOp(
1698 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1712 auto *IVInc = Def->getOperand(0);
1713 if (IVInc->getNumUsers() == 2) {
1718 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1719 Def->replaceAllUsesWith(IVInc);
1721 Inc->replaceAllUsesWith(Phi);
1722 Phi->setOperand(0,
Y);
1738 Steps->replaceAllUsesWith(Steps->getOperand(0));
1746 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1748 return PhiR && PhiR->isInLoop();
1754 Def->replaceAllUsesWith(
A);
1763 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1764 return Def->replaceAllUsesWith(
A);
1768 return Def->replaceAllUsesWith(
A);
1795 while (!Worklist.
empty()) {
1804 R->replaceAllUsesWith(
1805 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1824 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1833 !WidenStoreR->isConsecutive()) {
1834 VPValue *Mask = WidenStoreR->getMask();
1843 {WidenStoreR->getOperand(1)});
1848 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1849 true ,
nullptr , {},
1851 ScalarStore->insertBefore(WidenStoreR);
1852 WidenStoreR->eraseFromParent();
1857 if (RepR && RepR->getOpcode() == Instruction::Store &&
1860 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1861 true ,
nullptr , *RepR ,
1862 *RepR , RepR->getDebugLoc());
1863 Clone->insertBefore(RepOrWidenR);
1865 VPValue *ExtractOp = Clone->getOperand(0);
1871 Clone->setOperand(0, ExtractOp);
1872 RepR->eraseFromParent();
1881 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1890 return !U->usesScalars(
Op);
1894 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1897 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1898 IntroducesBCastOf(Op)))
1902 auto *IRV = dyn_cast<VPIRValue>(Op);
1903 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1904 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1905 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1910 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1911 true ,
nullptr, *RepOrWidenR);
1912 Clone->insertBefore(RepOrWidenR);
1913 RepOrWidenR->replaceAllUsesWith(Clone);
1915 RepOrWidenR->eraseFromParent();
1951 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1952 UniqueValues.
insert(Blend->getIncomingValue(0));
1953 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1955 UniqueValues.
insert(Blend->getIncomingValue(
I));
1957 if (UniqueValues.
size() == 1) {
1958 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1959 Blend->eraseFromParent();
1963 if (Blend->isNormalized())
1969 unsigned StartIndex = 0;
1970 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1975 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1982 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1984 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1985 if (
I == StartIndex)
1987 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1988 OperandsWithMask.
push_back(Blend->getMask(
I));
1993 OperandsWithMask, *Blend, Blend->getDebugLoc());
1994 NewBlend->insertBefore(&R);
1996 VPValue *DeadMask = Blend->getMask(StartIndex);
1998 Blend->eraseFromParent();
2003 if (NewBlend->getNumOperands() == 3 &&
2005 VPValue *Inc0 = NewBlend->getOperand(0);
2006 VPValue *Inc1 = NewBlend->getOperand(1);
2007 VPValue *OldMask = NewBlend->getOperand(2);
2008 NewBlend->setOperand(0, Inc1);
2009 NewBlend->setOperand(1, Inc0);
2010 NewBlend->setOperand(2, NewMask);
2037 APInt MaxVal = AlignedTC - 1;
2040 unsigned NewBitWidth =
2046 bool MadeChange =
false;
2055 if (!WideIV || !WideIV->isCanonical() ||
2056 WideIV->hasMoreThanOneUniqueUser() ||
2057 NewIVTy == WideIV->getScalarType())
2062 VPUser *SingleUser = WideIV->getSingleUser();
2070 auto *NewStart = Plan.
getZero(NewIVTy);
2071 WideIV->setStartValue(NewStart);
2073 WideIV->setStepValue(NewStep);
2080 Cmp->setOperand(1, NewBTC);
2094 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2096 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2110 const SCEV *VectorTripCount =
2115 "Trip count SCEV must be computable");
2136 auto *Term = &ExitingVPBB->
back();
2149 for (
unsigned Part = 0; Part < UF; ++Part) {
2155 Extracts[Part] = Ext;
2167 match(Phi->getBackedgeValue(),
2169 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2186 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2193 "Expected incoming values of Phi to be ActiveLaneMasks");
2198 EntryALM->setOperand(2, ALMMultiplier);
2199 LoopALM->setOperand(2, ALMMultiplier);
2203 ExtractFromALM(EntryALM, EntryExtracts);
2208 ExtractFromALM(LoopALM, LoopExtracts);
2210 Not->setOperand(0, LoopExtracts[0]);
2213 for (
unsigned Part = 0; Part < UF; ++Part) {
2214 Phis[Part]->setStartValue(EntryExtracts[Part]);
2215 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2228 auto *Term = &ExitingVPBB->
back();
2240 const SCEV *VectorTripCount =
2246 "Trip count SCEV must be computable");
2265 Term->setOperand(1, Plan.
getTrue());
2270 {}, Term->getDebugLoc());
2272 Term->eraseFromParent();
2307 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2317 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2318 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2346 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2349 if (SinkCandidate == Previous)
2353 !Seen.
insert(SinkCandidate).second ||
2366 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2369 "only recipes with a single defined value expected");
2384 if (SinkCandidate == FOR)
2387 SinkCandidate->moveAfter(Previous);
2388 Previous = SinkCandidate;
2412 [&VPDT, HoistPoint](
VPUser *U) {
2413 auto *R = cast<VPRecipeBase>(U);
2414 return HoistPoint == R ||
2415 VPDT.properlyDominates(HoistPoint, R);
2417 "HoistPoint must dominate all users of FOR");
2419 auto NeedsHoisting = [HoistPoint, &VPDT,
2421 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2422 if (!HoistCandidate)
2427 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2428 "CFG in VPlan should still be flat, without replicate regions");
2430 if (!Visited.
insert(HoistCandidate).second)
2442 return HoistCandidate;
2451 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2454 "only recipes with a single defined value expected");
2466 if (
auto *R = NeedsHoisting(
Op)) {
2469 if (R->getNumDefinedValues() != 1)
2483 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2502 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2505 while (
auto *PrevPhi =
2507 assert(PrevPhi->getParent() == FOR->getParent());
2509 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2528 {FOR, FOR->getBackedgeValue()});
2533 RecurSplice->setOperand(0, FOR);
2544 RecurKind RK = PhiR->getRecurrenceKind();
2551 RecWithFlags->dropPoisonGeneratingFlags();
2557struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2559 return Def == getEmptyKey() || Def == getTombstoneKey();
2570 return GEP->getSourceElementType();
2573 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2574 [](
auto *
I) {
return I->getSourceElementType(); })
2575 .
Default([](
auto *) {
return nullptr; });
2579 static bool canHandle(
const VPSingleDefRecipe *Def) {
2588 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2589 C->second == Instruction::ExtractValue)))
2595 return !
Def->mayReadFromMemory();
2599 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2600 const VPlan *Plan =
Def->getParent()->getPlan();
2601 VPTypeAnalysis TypeInfo(*Plan);
2604 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2607 if (RFlags->hasPredicate())
2610 return hash_combine(Result, SIVSteps->getInductionOpcode());
2615 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2618 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2620 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2622 !
equal(
L->operands(),
R->operands()))
2625 "must have valid opcode info for both recipes");
2627 if (LFlags->hasPredicate() &&
2628 LFlags->getPredicate() !=
2632 if (LSIV->getInductionOpcode() !=
2638 const VPRegionBlock *RegionL =
L->getRegion();
2639 const VPRegionBlock *RegionR =
R->getRegion();
2642 L->getParent() !=
R->getParent())
2644 const VPlan *Plan =
L->getParent()->getPlan();
2645 VPTypeAnalysis TypeInfo(*Plan);
2646 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2662 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2666 if (!VPDT.
dominates(V->getParent(), VPBB))
2671 Def->replaceAllUsesWith(V);
2690 "Expected vector prehader's successor to be the vector loop region");
2697 return !Op->isDefinedOutsideLoopRegions();
2700 R.moveBefore(*Preheader, Preheader->
end());
2718 assert(!RepR->isPredicated() &&
2719 "Expected prior transformation of predicated replicates to "
2720 "replicate regions");
2725 if (!RepR->isSingleScalar())
2737 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2738 auto *UserR = cast<VPRecipeBase>(U);
2739 VPBasicBlock *Parent = UserR->getParent();
2741 if (SinkBB && SinkBB != Parent)
2746 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2747 Parent->getSinglePredecessor() != LoopRegion;
2757 "Defining block must dominate sink block");
2783 VPValue *ResultVPV = R.getVPSingleValue();
2785 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2786 if (!NewResSizeInBits)
2799 (void)OldResSizeInBits;
2807 VPW->dropPoisonGeneratingFlags();
2809 if (OldResSizeInBits != NewResSizeInBits &&
2813 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2815 Ext->insertAfter(&R);
2817 Ext->setOperand(0, ResultVPV);
2818 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2821 "Only ICmps should not need extending the result.");
2831 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2832 auto *
Op = R.getOperand(Idx);
2833 unsigned OpSizeInBits =
2835 if (OpSizeInBits == NewResSizeInBits)
2837 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2838 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2840 R.setOperand(Idx, ProcessedIter->second);
2848 Builder.setInsertPoint(&R);
2850 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2851 ProcessedIter->second = NewOp;
2852 R.setOperand(Idx, NewOp);
2860 std::optional<VPDominatorTree> VPDT;
2877 assert(VPBB->getNumSuccessors() == 2 &&
2878 "Two successors expected for BranchOnCond");
2879 unsigned RemovedIdx;
2890 "There must be a single edge between VPBB and its successor");
2898 VPBB->back().eraseFromParent();
2910 if (Reachable.contains(
B))
2921 for (
VPValue *Def : R.definedValues())
2922 Def->replaceAllUsesWith(&Tmp);
2923 R.eraseFromParent();
2982 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2993 auto *EntryIncrement = Builder.createOverflowingOp(
2995 DL,
"index.part.next");
3001 {EntryIncrement, TC, ALMMultiplier},
DL,
3002 "active.lane.mask.entry");
3009 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
3014 Builder.setInsertPoint(OriginalTerminator);
3015 auto *InLoopIncrement = Builder.createOverflowingOp(
3017 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
3019 {InLoopIncrement, TC, ALMMultiplier},
DL,
3020 "active.lane.mask.next");
3025 auto *NotMask = Builder.createNot(ALM,
DL);
3032 bool UseActiveLaneMaskForControlFlow) {
3034 auto *FoundWidenCanonicalIVUser =
find_if(
3036 assert(FoundWidenCanonicalIVUser &&
3037 "Must have widened canonical IV when tail folding!");
3039 auto *WideCanonicalIV =
3042 if (UseActiveLaneMaskForControlFlow) {
3051 nullptr,
"active.lane.mask");
3067 template <
typename OpTy>
bool match(OpTy *V)
const {
3078template <
typename Op0_t,
typename Op1_t>
3097 VPValue *Addr, *Mask, *EndPtr;
3100 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3102 EVLEndPtr->insertBefore(&CurRecipe);
3103 EVLEndPtr->setOperand(1, &EVL);
3107 auto GetVPReverse = [&CurRecipe, &EVL, &TypeInfo, Plan,
3112 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
3114 Reverse->insertBefore(&CurRecipe);
3118 if (
match(&CurRecipe,
3129 Mask = GetVPReverse(Mask);
3130 Addr = AdjustEndPtr(EndPtr);
3133 LoadR->insertBefore(&CurRecipe);
3135 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
3143 StoredVal, EVL, Mask);
3145 if (
match(&CurRecipe,
3149 Mask = GetVPReverse(Mask);
3150 Addr = AdjustEndPtr(EndPtr);
3151 StoredVal = GetVPReverse(ReversedVal);
3153 StoredVal, EVL, Mask);
3157 if (Rdx->isConditional() &&
3162 if (Interleave->getMask() &&
3167 if (
match(&CurRecipe,
3176 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3190 if (
match(&CurRecipe,
3204 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3209 HeaderMask = R.getVPSingleValue();
3221 NewR->insertBefore(R);
3222 for (
auto [Old, New] :
3223 zip_equal(R->definedValues(), NewR->definedValues()))
3224 Old->replaceAllUsesWith(New);
3238 Merge->insertBefore(LogicalAnd);
3239 LogicalAnd->replaceAllUsesWith(
Merge);
3247 R->eraseFromParent();
3264 "User of VF that we can't transform to EVL.");
3274 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3275 "increment of the canonical induction.");
3291 MaxEVL = Builder.createScalarZExtOrTrunc(
3295 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3296 VPValue *PrevEVL = Builder.createScalarPhi(
3310 Intrinsic::experimental_vp_splice,
3311 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3315 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3328 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3329 m_VPValue(), m_VPValue()))))
3330 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3331 Plan.getVectorLoopRegion();
3343 VPValue *EVLMask = Builder.createICmp(
3403 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3415 auto *CurrentIteration =
3417 CurrentIteration->insertBefore(*Header, Header->begin());
3418 VPBuilder Builder(Header, Header->getFirstNonPhi());
3421 VPPhi *AVLPhi = Builder.createScalarPhi(
3425 if (MaxSafeElements) {
3435 Builder.setInsertPoint(CanonicalIVIncrement);
3439 OpVPEVL = Builder.createScalarZExtOrTrunc(
3440 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3442 auto *NextIter = Builder.createAdd(
3443 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3444 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3445 CurrentIteration->addOperand(NextIter);
3449 "avl.next", {
true,
false});
3457 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3458 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3472 assert(!CurrentIteration &&
3473 "Found multiple CurrentIteration. Only one expected");
3474 CurrentIteration = PhiR;
3478 if (!CurrentIteration)
3489 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3498 CanIVInc->eraseFromParent();
3507 if (Header->empty())
3516 if (!
match(EVLPhi->getBackedgeValue(),
3529 [[maybe_unused]]
bool FoundAVLNext =
3532 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3540 [[maybe_unused]]
bool FoundIncrement =
match(
3547 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3552 LatchBr->setOperand(
3563 return R->getRegion() ||
3567 for (
const SCEV *Stride : StridesMap.
values()) {
3570 const APInt *StrideConst;
3593 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3600 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3603 if (NewSCEV != ScevExpr) {
3605 ExpSCEV->replaceAllUsesWith(NewExp);
3614 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3618 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3623 while (!Worklist.
empty()) {
3626 if (!Visited.
insert(CurRec).second)
3648 RecWithFlags->isDisjoint()) {
3651 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3652 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3653 RecWithFlags->replaceAllUsesWith(New);
3654 RecWithFlags->eraseFromParent();
3657 RecWithFlags->dropPoisonGeneratingFlags();
3662 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3663 "found instruction with poison generating flags not covered by "
3664 "VPRecipeWithIRFlags");
3669 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3682 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3683 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3684 if (AddrDef && WidenRec->isConsecutive() &&
3685 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3686 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3688 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3692 InterleaveRec->getInterleaveGroup();
3693 bool NeedPredication =
false;
3695 I < NumMembers; ++
I) {
3698 NeedPredication |= BlockNeedsPredication(Member->getParent());
3701 if (NeedPredication)
3702 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3714 if (InterleaveGroups.empty())
3721 for (
const auto *IG : InterleaveGroups) {
3727 StoredValues.
push_back(StoreR->getStoredValue());
3728 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3735 StoredValues.
push_back(StoreR->getStoredValue());
3739 bool NeedsMaskForGaps =
3740 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3741 (!StoredValues.
empty() && !IG->isFull());
3753 VPValue *Addr = Start->getAddr();
3762 assert(IG->getIndex(IRInsertPos) != 0 &&
3763 "index of insert position shouldn't be zero");
3767 IG->getIndex(IRInsertPos),
3771 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3777 if (IG->isReverse()) {
3780 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3781 ReversePtr->insertBefore(InsertPos);
3785 InsertPos->getMask(), NeedsMaskForGaps,
3786 InterleaveMD, InsertPos->getDebugLoc());
3787 VPIG->insertBefore(InsertPos);
3790 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3793 if (!Member->getType()->isVoidTy()) {
3852 AddOp = Instruction::Add;
3853 MulOp = Instruction::Mul;
3855 AddOp =
ID.getInductionOpcode();
3856 MulOp = Instruction::FMul;
3864 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3865 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3874 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3879 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3880 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3896 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3900 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3903 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3906 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3913 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3916 WidePHI->addOperand(
Next);
3944 VPlan *Plan = R->getParent()->getPlan();
3945 VPValue *Start = R->getStartValue();
3946 VPValue *Step = R->getStepValue();
3947 VPValue *VF = R->getVFValue();
3949 assert(R->getInductionDescriptor().getKind() ==
3951 "Not a pointer induction according to InductionDescriptor!");
3954 "Recipe should have been replaced");
3960 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3964 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3967 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3969 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3970 R->replaceAllUsesWith(PtrAdd);
3975 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3977 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3980 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3989 if (!R->isReplicator())
3993 R->dissolveToCFGLoop();
4014 assert(Br->getNumOperands() == 2 &&
4015 "BranchOnTwoConds must have exactly 2 conditions");
4019 assert(Successors.size() == 3 &&
4020 "BranchOnTwoConds must have exactly 3 successors");
4025 VPValue *Cond0 = Br->getOperand(0);
4026 VPValue *Cond1 = Br->getOperand(1);
4031 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4044 Br->eraseFromParent();
4067 WidenIVR->replaceAllUsesWith(PtrAdd);
4080 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4081 Select = Builder.createSelect(Blend->getMask(
I),
4082 Blend->getIncomingValue(
I),
Select,
4083 R.getDebugLoc(),
"predphi", *Blend);
4084 Blend->replaceAllUsesWith(
Select);
4089 if (!VEPR->getOffset()) {
4091 "Expected unroller to have materialized offset for UF != 1");
4092 VEPR->materializeOffset();
4107 for (
VPValue *
Op : LastActiveL->operands()) {
4108 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4113 VPValue *FirstInactiveLane = Builder.createNaryOp(
4115 LastActiveL->getDebugLoc(),
"first.inactive.lane");
4121 Builder.createSub(FirstInactiveLane, One,
4122 LastActiveL->getDebugLoc(),
"last.active.lane");
4132 assert(VPI->isMasked() &&
4133 "Unmasked MaskedCond should be simplified earlier");
4134 VPI->replaceAllUsesWith(Builder.createNaryOp(
4146 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4147 VPI->getDebugLoc());
4148 VPI->replaceAllUsesWith(
Add);
4157 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4160 ToRemove.push_back(BranchOnCountInst);
4175 ? Instruction::UIToFP
4176 : Instruction::Trunc;
4177 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4183 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4189 MulOpc = Instruction::FMul;
4190 Flags = VPI->getFastMathFlags();
4192 MulOpc = Instruction::Mul;
4197 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4199 VPI->replaceAllUsesWith(VectorStep);
4205 R->eraseFromParent();
4213 struct EarlyExitInfo {
4224 if (Pred == MiddleVPBB)
4229 VPValue *CondOfEarlyExitingVPBB;
4230 [[maybe_unused]]
bool Matched =
4231 match(EarlyExitingVPBB->getTerminator(),
4233 assert(Matched &&
"Terminator must be BranchOnCond");
4237 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4238 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4240 TrueSucc == ExitBlock
4241 ? CondOfEarlyExitingVPBB
4242 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4248 "exit condition must dominate the latch");
4257 assert(!Exits.
empty() &&
"must have at least one early exit");
4264 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4266 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4267 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4273 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4274 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4276 Exits[
I].EarlyExitingVPBB) &&
4277 "RPO sort must place dominating exits before dominated ones");
4283 VPValue *Combined = Exits[0].CondToExit;
4284 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4285 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4291 "Early exit store masking not implemented");
4295 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4299 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4307 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4341 for (
auto [Exit, VectorEarlyExitVPBB] :
4342 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4343 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4355 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4356 VPValue *NewIncoming = IncomingVal;
4358 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4363 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4364 ExitIRI->addOperand(NewIncoming);
4367 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4401 bool IsLastDispatch = (
I + 2 == Exits.
size());
4403 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4409 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4412 CurrentBB = FalseBB;
4419 "Unexpected terminator");
4420 auto *IsLatchExitTaken =
4422 LatchExitingBranch->getOperand(1));
4424 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4425 LatchExitingBranch->eraseFromParent();
4426 Builder.setInsertPoint(LatchVPBB);
4428 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4430 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4441 Type *RedTy = Ctx.Types.inferScalarType(Red);
4442 VPValue *VecOp = Red->getVecOp();
4444 assert(!Red->isPartialReduction() &&
4445 "This path does not support partial reductions");
4448 auto IsExtendedRedValidAndClampRange =
4461 "getExtendedReductionCost only supports integer types");
4462 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4463 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4464 Red->getFastMathFlags(),
CostKind);
4465 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4473 IsExtendedRedValidAndClampRange(
4476 Ctx.Types.inferScalarType(
A)))
4495 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4496 Opcode != Instruction::FAdd)
4499 assert(!Red->isPartialReduction() &&
4500 "This path does not support partial reductions");
4501 Type *RedTy = Ctx.Types.inferScalarType(Red);
4504 auto IsMulAccValidAndClampRange =
4511 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4517 (Ext0->getOpcode() != Ext1->getOpcode() ||
4518 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4522 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4524 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4531 ExtCost += Ext0->computeCost(VF, Ctx);
4533 ExtCost += Ext1->computeCost(VF, Ctx);
4535 ExtCost += OuterExt->computeCost(VF, Ctx);
4537 return MulAccCost.
isValid() &&
4538 MulAccCost < ExtCost + MulCost + RedCost;
4543 VPValue *VecOp = Red->getVecOp();
4581 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4582 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4583 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4584 Mul->setOperand(1, ExtB);
4594 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4599 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4606 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4623 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4632 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4633 Ext0->getOpcode() == Ext1->getOpcode() &&
4634 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4636 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4637 *Ext0, *Ext0, Ext0->getDebugLoc());
4638 NewExt0->insertBefore(Ext0);
4643 Ext->getResultType(),
nullptr, *Ext1,
4644 *Ext1, Ext1->getDebugLoc());
4647 Mul->setOperand(0, NewExt0);
4648 Mul->setOperand(1, NewExt1);
4649 Red->setOperand(1,
Mul);
4663 assert(!Red->isPartialReduction() &&
4664 "This path does not support partial reductions");
4667 auto IP = std::next(Red->getIterator());
4668 auto *VPBB = Red->getParent();
4678 Red->replaceAllUsesWith(AbstractR);
4708 for (
VPValue *VPV : VPValues) {
4717 if (
User->usesScalars(VPV))
4720 HoistPoint = HoistBlock->
begin();
4724 "All users must be in the vector preheader or dominated by it");
4729 VPV->replaceUsesWithIf(Broadcast,
4730 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4731 return Broadcast != &U && !U.usesScalars(VPV);
4748 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4749 RepR->getOpcode() != Instruction::Load)
4752 VPValue *Addr = RepR->getOperand(0);
4755 if (!
Loc.AATags.Scope)
4760 if (R.mayWriteToMemory()) {
4762 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4770 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4774 const AAMDNodes &LoadAA = LoadLoc.AATags;
4790 return CommonMetadata;
4793template <
unsigned Opcode>
4798 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4799 "Only Load and Store opcodes supported");
4800 constexpr bool IsLoad = (Opcode == Instruction::Load);
4806 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4811 for (
auto Recipes :
Groups) {
4812 if (Recipes.size() < 2)
4820 VPValue *MaskI = RecipeI->getMask();
4821 Type *TypeI = GetLoadStoreValueType(RecipeI);
4827 bool HasComplementaryMask =
false;
4832 VPValue *MaskJ = RecipeJ->getMask();
4833 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4834 if (TypeI == TypeJ) {
4844 if (HasComplementaryMask) {
4845 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4855template <
typename InstType>
4873 for (
auto &Group :
Groups) {
4893 return R->isSingleScalar() == IsSingleScalar;
4895 "all members in group must agree on IsSingleScalar");
4900 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4901 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4903 UnpredicatedLoad->insertBefore(EarliestLoad);
4907 Load->replaceAllUsesWith(UnpredicatedLoad);
4908 Load->eraseFromParent();
4918 if (!StoreLoc || !StoreLoc->AATags.Scope)
4924 StoresToSink.
end());
4928 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4942 for (
auto &Group :
Groups) {
4955 VPValue *SelectedValue = Group[0]->getOperand(0);
4958 bool IsSingleScalar = Group[0]->isSingleScalar();
4959 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4960 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4961 "all members in group must agree on IsSingleScalar");
4962 VPValue *Mask = Group[
I]->getMask();
4964 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4973 StoreWithMinAlign->getUnderlyingInstr(),
4974 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4975 nullptr, *LastStore, CommonMetadata);
4976 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4980 Store->eraseFromParent();
4987 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4988 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5053 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5055 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5062 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5072 DefR->replaceUsesWithIf(
5073 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5075 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5089 for (
VPValue *Def : R.definedValues()) {
5102 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5104 return U->usesScalars(Def) &&
5107 if (
none_of(Def->users(), IsCandidateUnpackUser))
5114 Unpack->insertAfter(&R);
5115 Def->replaceUsesWithIf(Unpack,
5116 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5117 return IsCandidateUnpackUser(&U);
5126 bool RequiresScalarEpilogue,
VPValue *Step,
5127 std::optional<uint64_t> MaxRuntimeStep) {
5138 assert(StepR->getParent() == VectorPHVPBB &&
5139 "Step must be defined in VectorPHVPBB");
5141 InsertPt = std::next(StepR->getIterator());
5143 VPBuilder Builder(VectorPHVPBB, InsertPt);
5149 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5161 if (TailByMasking) {
5162 TC = Builder.createAdd(
5173 Builder.createNaryOp(Instruction::URem, {TC, Step},
5182 if (RequiresScalarEpilogue) {
5184 "requiring scalar epilogue is not supported with fail folding");
5187 R = Builder.createSelect(IsZero, Step, R);
5201 "VF and VFxUF must be materialized together");
5213 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5220 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5224 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5228 VPValue *MulByUF = Builder.createOverflowingOp(
5240 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5248 const SCEV *Expr = ExpSCEV->getSCEV();
5251 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5256 ExpSCEV->eraseFromParent();
5259 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5260 "before any VPIRInstructions");
5263 auto EI = Entry->begin();
5273 return ExpandedSCEVs;
5285 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5289 return Member0Op == OpV;
5293 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5296 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5313 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5316 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5321 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5322 const auto &[
OpIdx, OpV] =
P;
5337 if (!InterleaveR || InterleaveR->
getMask())
5338 return std::nullopt;
5340 Type *GroupElementTy =
nullptr;
5344 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5345 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5347 return std::nullopt;
5352 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5353 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5355 return std::nullopt;
5359 if (IG->getFactor() != IG->getNumMembers())
5360 return std::nullopt;
5366 assert(
Size.isScalable() == VF.isScalable() &&
5367 "if Size is scalable, VF must be scalable and vice versa");
5368 return Size.getKnownMinValue();
5372 unsigned MinVal = VF.getKnownMinValue();
5374 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5377 return std::nullopt;
5385 return RepR && RepR->isSingleScalar();
5392 auto *R = V->getDefiningRecipe();
5401 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5402 WideMember0->setOperand(
5411 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5413 LoadGroup->getMask(),
true,
5414 {}, LoadGroup->getDebugLoc());
5415 L->insertBefore(LoadGroup);
5421 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5422 "must be a single scalar load");
5423 NarrowedOps.
insert(RepR);
5428 VPValue *PtrOp = WideLoad->getAddr();
5430 PtrOp = VecPtr->getOperand(0);
5435 nullptr, {}, *WideLoad);
5436 N->insertBefore(WideLoad);
5441std::unique_ptr<VPlan>
5461 "unexpected branch-on-count");
5465 std::optional<ElementCount> VFToOptimize;
5479 if (R.mayWriteToMemory() && !InterleaveR)
5494 std::optional<ElementCount> NarrowedVF =
5496 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5498 VFToOptimize = NarrowedVF;
5501 if (InterleaveR->getStoredValues().empty())
5506 auto *Member0 = InterleaveR->getStoredValues()[0];
5516 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5519 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5520 return IR && IR->getInterleaveGroup()->isFull() &&
5521 IR->getVPValue(Op.index()) == Op.value();
5530 VFToOptimize->isScalable()))
5535 if (StoreGroups.
empty())
5539 bool RequiresScalarEpilogue =
5550 std::unique_ptr<VPlan> NewPlan;
5552 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5553 Plan.
setVF(*VFToOptimize);
5554 NewPlan->removeVF(*VFToOptimize);
5560 for (
auto *StoreGroup : StoreGroups) {
5567 StoreGroup->getDebugLoc());
5568 S->insertBefore(StoreGroup);
5569 StoreGroup->eraseFromParent();
5581 if (VFToOptimize->isScalable()) {
5593 RequiresScalarEpilogue, Step);
5601 "All VPVectorPointerRecipes should have been removed");
5617 "must have a BranchOnCond");
5620 if (VF.
isScalable() && VScaleForTuning.has_value())
5621 VectorStep *= *VScaleForTuning;
5622 assert(VectorStep > 0 &&
"trip count should not be zero");
5626 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5633 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5645 "Cannot handle loops with uncountable early exits");
5718 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5732 "vector.recur.extract.for.phi");
5751 VPValue *WidenIVCandidate = BinOp->getOperand(0);
5752 VPValue *InvariantCandidate = BinOp->getOperand(1);
5754 std::swap(WidenIVCandidate, InvariantCandidate);
5768 auto *ClonedOp = BinOp->
clone();
5769 if (ClonedOp->getOperand(0) == WidenIV) {
5770 ClonedOp->setOperand(0, ScalarIV);
5772 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
5773 ClonedOp->setOperand(1, ScalarIV);
5788 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
5789 bool UseMax) -> std::optional<APSInt> {
5791 for (
bool Signed : {
true,
false}) {
5800 return std::nullopt;
5808 PhiR->getRecurrenceKind()))
5817 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5831 !
match(FindLastSelect,
5840 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
5846 "IVOfExpressionToSink not being an AddRec must imply "
5847 "FindLastExpression not being an AddRec.");
5858 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
5859 bool UseSigned = SentinelVal && SentinelVal->isSigned();
5866 if (IVOfExpressionToSink) {
5867 const SCEV *FindLastExpressionSCEV =
5869 if (
match(FindLastExpressionSCEV,
5872 if (
auto NewSentinel =
5873 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
5876 SentinelVal = *NewSentinel;
5877 UseSigned = NewSentinel->isSigned();
5879 IVSCEV = FindLastExpressionSCEV;
5880 IVOfExpressionToSink =
nullptr;
5890 if (AR->hasNoSignedWrap())
5892 else if (AR->hasNoUnsignedWrap())
5902 VPValue *NewFindLastSelect = BackedgeVal;
5904 if (!SentinelVal || IVOfExpressionToSink) {
5907 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
5908 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
5909 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
5910 SelectCond = LoopBuilder.
createNot(SelectCond);
5917 if (SelectCond !=
Cond || IVOfExpressionToSink) {
5920 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
5929 VPIRFlags Flags(MinMaxKind,
false,
false,
5935 NewFindLastSelect, Flags, ExitDL);
5938 VPValue *VectorRegionExitingVal = ReducedIV;
5939 if (IVOfExpressionToSink)
5940 VectorRegionExitingVal =
5942 ReducedIV, IVOfExpressionToSink);
5945 VPValue *StartVPV = PhiR->getStartValue();
5952 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
5962 AnyOfPhi->insertAfter(PhiR);
5969 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
5982 PhiR->hasUsesOutsideReductionChain());
5983 NewPhiR->insertBefore(PhiR);
5984 PhiR->replaceAllUsesWith(NewPhiR);
5985 PhiR->eraseFromParent();
5992struct ReductionExtend {
5993 Type *SrcType =
nullptr;
5994 ExtendKind Kind = ExtendKind::PR_None;
6000struct ExtendedReductionOperand {
6004 ReductionExtend ExtendA, ExtendB;
6012struct VPPartialReductionChain {
6015 VPWidenRecipe *ReductionBinOp =
nullptr;
6017 ExtendedReductionOperand ExtendedOp;
6024 unsigned AccumulatorOpIdx;
6025 unsigned ScaleFactor;
6038 if (!
Op->hasOneUse() ||
6044 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6045 Op->getOperand(1), NarrowTy);
6047 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6056 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
6058 assert(Ext->getOpcode() ==
6060 "Expected both the LHS and RHS extends to be the same");
6061 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6064 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6065 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6066 auto *
Max = Builder.insert(
6068 {FreezeX, FreezeY}, SrcTy));
6069 auto *Min = Builder.insert(
6071 {FreezeX, FreezeY}, SrcTy));
6074 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6087 if (!
Mul->hasOneUse() ||
6088 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6089 MulLHS->getOpcode() != MulRHS->getOpcode())
6092 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
6093 MulLHS->getOperand(0),
6094 Ext->getResultType()));
6095 Mul->setOperand(1, MulLHS == MulRHS
6096 ?
Mul->getOperand(0)
6097 : Builder.createWidenCast(MulRHS->getOpcode(),
6098 MulRHS->getOperand(0),
6099 Ext->getResultType()));
6108 VPValue *VecOp = Red->getVecOp();
6142static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6150 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6166 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6174 Builder.insert(NegRecipe);
6175 ExtendedOp = NegRecipe;
6179 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp, TypeInfo);
6189 assert((!ExitValue || IsLastInChain) &&
6190 "if we found ExitValue, it must match RdxPhi's backedge value");
6201 PartialRed->insertBefore(WidenRecipe);
6209 E->insertBefore(WidenRecipe);
6210 PartialRed->replaceAllUsesWith(
E);
6223 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6224 StartInst->setOperand(2, NewScaleFactor);
6232 VPValue *OldStartValue = StartInst->getOperand(0);
6233 StartInst->setOperand(0, StartInst->getOperand(1));
6237 assert(RdxResult &&
"Could not find reduction result");
6240 constexpr unsigned SubOpc = Instruction::BinaryOps::Sub;
6246 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6252 const VPPartialReductionChain &Link,
6255 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6256 std::optional<unsigned> BinOpc = std::nullopt;
6258 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6259 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6261 std::optional<llvm::FastMathFlags>
Flags;
6266 ? (unsigned)Instruction::Add
6269 Opcode, ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType, RdxType,
6270 VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6293static std::optional<ExtendedReductionOperand>
6297 "Op should be operand of UpdateR");
6305 if (
Op->hasOneUse() &&
6315 if (LHSInputType != RHSInputType ||
6316 LHSExt->getOpcode() != RHSExt->getOpcode())
6317 return std::nullopt;
6320 return ExtendedReductionOperand{
6322 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6326 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6329 VPValue *CastSource = CastRecipe->getOperand(0);
6330 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6339 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6340 UpdateR->
getOpcode() == Instruction::FAdd) {
6344 return ExtendedReductionOperand{
6351 if (!
Op->hasOneUse())
6352 return std::nullopt;
6357 return std::nullopt;
6367 return std::nullopt;
6371 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6374 const APInt *RHSConst =
nullptr;
6380 return std::nullopt;
6384 if (Cast && OuterExtKind &&
6385 getPartialReductionExtendKind(Cast) != OuterExtKind)
6386 return std::nullopt;
6388 Type *RHSInputType = LHSInputType;
6389 ExtendKind RHSExtendKind = LHSExtendKind;
6392 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6395 return ExtendedReductionOperand{
6396 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6403static std::optional<SmallVector<VPPartialReductionChain>>
6411 return std::nullopt;
6422 VPValue *CurrentValue = ExitValue;
6423 while (CurrentValue != RedPhiR) {
6426 return std::nullopt;
6433 std::optional<ExtendedReductionOperand> ExtendedOp =
6434 matchExtendedReductionOperand(UpdateR,
Op, TypeInfo);
6436 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue, TypeInfo);
6438 return std::nullopt;
6442 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6445 return std::nullopt;
6450 VPPartialReductionChain Link(
6451 {UpdateR, *ExtendedOp, RK,
6455 CurrentValue = PrevValue;
6460 std::reverse(Chain.
begin(), Chain.
end());
6479 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6480 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6483 if (ChainsByPhi.
empty())
6490 for (
const auto &[
_, Chains] : ChainsByPhi)
6491 for (
const VPPartialReductionChain &Chain : Chains) {
6492 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6493 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6499 auto ExtendUsersValid = [&](
VPValue *Ext) {
6501 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6505 auto IsProfitablePartialReductionChainForVF =
6512 for (
const VPPartialReductionChain &Link : Chain) {
6513 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6514 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6518 PartialCost += LinkCost;
6519 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6521 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6522 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6525 RegularCost += Extend->computeCost(VF, CostCtx);
6527 return PartialCost.
isValid() && PartialCost < RegularCost;
6535 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6536 for (
const VPPartialReductionChain &Chain : Chains) {
6537 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6541 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6543 return PhiR == RedPhiR;
6545 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6551 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6560 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6561 return RepR && RepR->getOpcode() == Instruction::Store;
6572 return IsProfitablePartialReductionChainForVF(Chains, VF);
6578 for (
auto &[Phi, Chains] : ChainsByPhi)
6579 for (
const VPPartialReductionChain &Chain : Chains)
6580 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
6594 if (VPI && VPI->getUnderlyingValue() &&
6608 New->insertBefore(VPI);
6609 if (VPI->getOpcode() == Instruction::Load)
6610 VPI->replaceAllUsesWith(New->getVPSingleValue());
6611 VPI->eraseFromParent();
6616 FinalRedStoresBuilder))
6625 ReplaceWith(Histogram);
6633 ReplaceWith(Recipe);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
void setRecipe(Instruction *I, VPRecipeBase *R)
Set the recipe created for given ingredient.
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...