57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
83 false , *VPI, Ingredient.getDebugLoc());
86 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
87 nullptr ,
false , *VPI,
88 Ingredient.getDebugLoc());
91 Ingredient.getDebugLoc());
103 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
108 if (VectorID == Intrinsic::assume ||
109 VectorID == Intrinsic::lifetime_end ||
110 VectorID == Intrinsic::lifetime_start ||
111 VectorID == Intrinsic::sideeffect ||
112 VectorID == Intrinsic::pseudoprobe) {
117 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
118 VectorID != Intrinsic::pseudoprobe;
122 Ingredient.getDebugLoc());
125 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
126 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
130 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
134 *VPI, Ingredient.getDebugLoc());
138 "inductions must be created earlier");
147 "Only recpies with zero or one defined values expected");
148 Ingredient.eraseFromParent();
165 if (
A->getOpcode() != Instruction::Store ||
166 B->getOpcode() != Instruction::Store)
176 const APInt *Distance;
182 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
184 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
190 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
192 auto VFs =
B->getParent()->getPlan()->vectorFactors();
196 return Distance->
abs().
uge(
204 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
205 L(L), TypeInfo(TypeInfo) {}
212 return ExcludeRecipes.contains(&R) ||
213 (Store && isNoAliasViaDistance(Store, &GroupLeader));
226 std::optional<SinkStoreInfo> SinkInfo = {}) {
227 bool CheckReads = SinkInfo.has_value();
234 if (SinkInfo && SinkInfo->shouldSkip(R))
238 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
256template <
unsigned Opcode>
261 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
262 "Only Load and Store opcodes supported");
263 constexpr bool IsLoad = (Opcode == Instruction::Load);
270 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
274 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
277 RecipesByAddress[AddrSCEV].push_back(RepR);
282 for (
auto &Group :
Groups) {
301 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
306 return RepR && RepR->getOpcode() == Instruction::Alloca;
315 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
331 if (!ScalarVFOnly && RepR->isSingleScalar())
334 WorkList.
insert({SinkTo, Candidate});
346 for (
auto &Recipe : *VPBB)
348 InsertIfValidSinkCandidate(VPBB,
Op);
352 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
355 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
360 auto UsersOutsideSinkTo =
362 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
364 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
365 return !U->usesFirstLaneOnly(SinkCandidate);
368 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
370 if (NeedsDuplicating) {
374 if (
auto *SinkCandidateRepR =
380 nullptr , *SinkCandidateRepR,
384 Clone = SinkCandidate->
clone();
394 InsertIfValidSinkCandidate(SinkTo,
Op);
404 if (!EntryBB || EntryBB->size() != 1 ||
414 if (EntryBB->getNumSuccessors() != 2)
419 if (!Succ0 || !Succ1)
422 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
424 if (Succ0->getSingleSuccessor() == Succ1)
426 if (Succ1->getSingleSuccessor() == Succ0)
443 if (!Region1->isReplicator())
445 auto *MiddleBasicBlock =
447 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
452 if (!Region2 || !Region2->isReplicator())
457 if (!Mask1 || Mask1 != Mask2)
460 assert(Mask1 && Mask2 &&
"both region must have conditions");
466 if (TransformedRegions.
contains(Region1))
473 if (!Then1 || !Then2)
493 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
499 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
500 Phi1ToMove.eraseFromParent();
503 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
517 TransformedRegions.
insert(Region1);
520 return !TransformedRegions.
empty();
527 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
528 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
529 auto *BlockInMask = PredRecipe->
getMask();
548 RecipeWithoutMask->getDebugLoc());
572 if (RepR->isPredicated())
591 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
603 if (!VPBB->getParent())
607 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
616 R.moveBefore(*PredVPBB, PredVPBB->
end());
618 auto *ParentRegion = VPBB->getParent();
619 if (ParentRegion && ParentRegion->getExiting() == VPBB)
620 ParentRegion->setExiting(PredVPBB);
624 return !WorkList.
empty();
631 bool ShouldSimplify =
true;
632 while (ShouldSimplify) {
648 if (!
IV ||
IV->getTruncInst())
663 for (
auto *U : FindMyCast->
users()) {
665 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
666 FoundUserCast = UserCast;
670 FindMyCast = FoundUserCast;
695 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
709 WidenOriginalIV->dropPoisonGeneratingFlags();
722 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
724 if (IsConditionalAssume)
727 if (R.mayHaveSideEffects())
731 return all_of(R.definedValues(),
732 [](
VPValue *V) { return V->getNumUsers() == 0; });
752 VPUser *PhiUser = PhiR->getSingleUser();
755 if (PhiUser !=
Incoming->getDefiningRecipe() ||
758 PhiR->replaceAllUsesWith(Start);
759 PhiR->eraseFromParent();
760 Incoming->getDefiningRecipe()->eraseFromParent();
775 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
785 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
791 if (ResultTy != StepTy) {
798 Builder.setInsertPoint(VecPreheader);
799 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
801 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
807 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
812 Users.insert_range(V->users());
814 return Users.takeVector();
828 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
865 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
866 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
874 Def->operands(),
true,
876 Clone->insertAfter(Def);
877 Def->replaceAllUsesWith(Clone);
888 PtrIV->replaceAllUsesWith(PtrAdd);
895 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
896 return U->usesScalars(WideIV);
902 Plan,
ID.getKind(),
ID.getInductionOpcode(),
904 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
905 WideIV->getDebugLoc(), Builder);
908 if (!HasOnlyVectorVFs) {
910 "plans containing a scalar VF cannot also include scalable VFs");
911 WideIV->replaceAllUsesWith(Steps);
914 WideIV->replaceUsesWithIf(Steps,
915 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
917 return U.usesFirstLaneOnly(WideIV);
918 return U.usesScalars(WideIV);
934 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
939 if (!Def || Def->getNumOperands() != 2)
947 auto IsWideIVInc = [&]() {
948 auto &
ID = WideIV->getInductionDescriptor();
951 VPValue *IVStep = WideIV->getStepValue();
952 switch (
ID.getInductionOpcode()) {
953 case Instruction::Add:
955 case Instruction::FAdd:
957 case Instruction::FSub:
960 case Instruction::Sub: {
980 return IsWideIVInc() ? WideIV :
nullptr;
1000 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1013 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
1014 FirstActiveLaneType,
DL);
1015 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1022 EndValue =
B.createAdd(EndValue, One,
DL);
1025 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1027 VPIRValue *Start = WideIV->getStartValue();
1028 VPValue *Step = WideIV->getStepValue();
1029 EndValue =
B.createDerivedIV(
1031 Start, EndValue, Step);
1046 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1053 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1056 Start, VectorTC, Step);
1085 assert(EndValue &&
"Must have computed the end value up front");
1101 auto *Zero = Plan.
getZero(StepTy);
1102 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1107 return B.createNaryOp(
1108 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1110 : Instruction::FAdd,
1111 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1123 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1132 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1133 EndValues[WideIV] = EndValue;
1143 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1144 R.eraseFromParent();
1153 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1155 if (PredVPBB == MiddleVPBB)
1157 ExitIRI->getOperand(Idx),
1161 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1163 ExitIRI->setOperand(Idx, Escape);
1180 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1183 ExpR->replaceAllUsesWith(V->second);
1184 ExpR->eraseFromParent();
1193 while (!WorkList.
empty()) {
1195 if (!Seen.
insert(Cur).second)
1203 R->eraseFromParent();
1210static std::optional<std::pair<bool, unsigned>>
1213 std::optional<std::pair<bool, unsigned>>>(R)
1216 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1218 return std::make_pair(
true,
I->getVectorIntrinsicID());
1220 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1224 return std::make_pair(
false,
1227 .
Default([](
auto *) {
return std::nullopt; });
1245 Value *V =
Op->getUnderlyingValue();
1251 auto FoldToIRValue = [&]() ->
Value * {
1253 if (OpcodeOrIID->first) {
1254 if (R.getNumOperands() != 2)
1256 unsigned ID = OpcodeOrIID->second;
1257 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1260 unsigned Opcode = OpcodeOrIID->second;
1269 return Folder.FoldSelect(
Ops[0],
Ops[1],
1272 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1274 case Instruction::Select:
1275 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1276 case Instruction::ICmp:
1277 case Instruction::FCmp:
1280 case Instruction::GetElementPtr: {
1283 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1293 case Instruction::ExtractElement:
1300 if (
Value *V = FoldToIRValue())
1301 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1307 VPlan *Plan = Def->getParent()->getPlan();
1313 return Def->replaceAllUsesWith(V);
1319 PredPHI->replaceAllUsesWith(
Op);
1332 bool CanCreateNewRecipe =
1339 if (TruncTy == ATy) {
1340 Def->replaceAllUsesWith(
A);
1349 : Instruction::ZExt;
1352 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1354 Ext->setUnderlyingValue(UnderlyingExt);
1356 Def->replaceAllUsesWith(Ext);
1358 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1359 Def->replaceAllUsesWith(Trunc);
1367 for (
VPUser *U :
A->users()) {
1369 for (
VPValue *VPV : R->definedValues())
1383 Def->replaceAllUsesWith(
X);
1384 Def->eraseFromParent();
1390 return Def->replaceAllUsesWith(
1395 return Def->replaceAllUsesWith(
X);
1399 return Def->replaceAllUsesWith(
1404 return Def->replaceAllUsesWith(
1409 return Def->replaceAllUsesWith(
X);
1413 return Def->replaceAllUsesWith(Plan->
getFalse());
1417 return Def->replaceAllUsesWith(
X);
1420 if (CanCreateNewRecipe &&
1425 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1426 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1427 return Def->replaceAllUsesWith(
1428 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1433 return Def->replaceAllUsesWith(Def->getOperand(1));
1438 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1442 return Def->replaceAllUsesWith(Plan->
getFalse());
1445 return Def->replaceAllUsesWith(
X);
1449 if (CanCreateNewRecipe &&
1451 return Def->replaceAllUsesWith(Builder.createNot(
C));
1455 Def->setOperand(0,
C);
1456 Def->setOperand(1,
Y);
1457 Def->setOperand(2,
X);
1462 return Def->replaceAllUsesWith(
A);
1465 return Def->replaceAllUsesWith(
A);
1468 return Def->replaceAllUsesWith(
1475 return Def->replaceAllUsesWith(
1477 Def->getDebugLoc(),
"", NW));
1483 return Def->replaceAllUsesWith(Builder.createNaryOp(
1485 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1490 return Def->replaceAllUsesWith(Builder.createNaryOp(
1492 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1497 return Def->replaceAllUsesWith(
A);
1512 R->setOperand(1,
Y);
1513 R->setOperand(2,
X);
1517 R->replaceAllUsesWith(Cmp);
1522 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1523 Cmp->setDebugLoc(Def->getDebugLoc());
1535 if (
Op->getNumUsers() > 1 ||
1539 }
else if (!UnpairedCmp) {
1540 UnpairedCmp =
Op->getDefiningRecipe();
1544 UnpairedCmp =
nullptr;
1551 if (NewOps.
size() < Def->getNumOperands()) {
1553 return Def->replaceAllUsesWith(NewAnyOf);
1560 if (CanCreateNewRecipe &&
1566 return Def->replaceAllUsesWith(NewCmp);
1574 return Def->replaceAllUsesWith(Def->getOperand(1));
1580 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1581 Def->replaceAllUsesWith(
X);
1591 Def->setOperand(1, Def->getOperand(0));
1592 Def->setOperand(0,
Y);
1599 return Def->replaceAllUsesWith(Def->getOperand(0));
1605 Def->replaceAllUsesWith(
1606 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1610 return Def->replaceAllUsesWith(
A);
1616 Def->replaceAllUsesWith(
1617 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1624 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1629 Def->replaceAllUsesWith(
1639 "broadcast operand must be single-scalar");
1640 Def->setOperand(0,
C);
1645 if (Def->getNumOperands() == 1) {
1646 Def->replaceAllUsesWith(Def->getOperand(0));
1651 Phi->replaceAllUsesWith(Phi->getOperand(0));
1657 if (Def->getNumOperands() == 1 &&
1659 return Def->replaceAllUsesWith(IRV);
1672 return Def->replaceAllUsesWith(
A);
1675 Def->replaceAllUsesWith(Builder.createNaryOp(
1676 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1690 auto *IVInc = Def->getOperand(0);
1691 if (IVInc->getNumUsers() == 2) {
1696 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1697 Def->replaceAllUsesWith(IVInc);
1699 Inc->replaceAllUsesWith(Phi);
1700 Phi->setOperand(0,
Y);
1716 Steps->replaceAllUsesWith(Steps->getOperand(0));
1724 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1726 return PhiR && PhiR->isInLoop();
1732 Def->replaceAllUsesWith(
A);
1741 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1742 return Def->replaceAllUsesWith(
A);
1746 return Def->replaceAllUsesWith(
A);
1773 while (!Worklist.
empty()) {
1782 R->replaceAllUsesWith(
1783 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1802 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1811 !WidenStoreR->isConsecutive()) {
1812 VPValue *Mask = WidenStoreR->getMask();
1821 {WidenStoreR->getOperand(1)});
1826 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1827 true ,
nullptr , {},
1829 ScalarStore->insertBefore(WidenStoreR);
1830 WidenStoreR->eraseFromParent();
1838 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1839 true ,
nullptr , *RepR ,
1840 *RepR , RepR->getDebugLoc());
1841 Clone->insertBefore(RepOrWidenR);
1843 VPValue *ExtractOp = Clone->getOperand(0);
1849 Clone->setOperand(0, ExtractOp);
1850 RepR->eraseFromParent();
1859 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1868 return !U->usesScalars(
Op);
1872 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1875 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1876 IntroducesBCastOf(Op)))
1880 auto *IRV = dyn_cast<VPIRValue>(Op);
1881 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1882 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1883 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1888 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1889 true ,
nullptr, *RepOrWidenR);
1890 Clone->insertBefore(RepOrWidenR);
1891 RepOrWidenR->replaceAllUsesWith(Clone);
1893 RepOrWidenR->eraseFromParent();
1929 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1930 UniqueValues.
insert(Blend->getIncomingValue(0));
1931 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1933 UniqueValues.
insert(Blend->getIncomingValue(
I));
1935 if (UniqueValues.
size() == 1) {
1936 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1937 Blend->eraseFromParent();
1941 if (Blend->isNormalized())
1947 unsigned StartIndex = 0;
1948 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1953 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1960 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1962 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1963 if (
I == StartIndex)
1965 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1966 OperandsWithMask.
push_back(Blend->getMask(
I));
1971 OperandsWithMask, *Blend, Blend->getDebugLoc());
1972 NewBlend->insertBefore(&R);
1974 VPValue *DeadMask = Blend->getMask(StartIndex);
1976 Blend->eraseFromParent();
1981 if (NewBlend->getNumOperands() == 3 &&
1983 VPValue *Inc0 = NewBlend->getOperand(0);
1984 VPValue *Inc1 = NewBlend->getOperand(1);
1985 VPValue *OldMask = NewBlend->getOperand(2);
1986 NewBlend->setOperand(0, Inc1);
1987 NewBlend->setOperand(1, Inc0);
1988 NewBlend->setOperand(2, NewMask);
2015 APInt MaxVal = AlignedTC - 1;
2018 unsigned NewBitWidth =
2024 bool MadeChange =
false;
2033 if (!WideIV || !WideIV->isCanonical() ||
2034 WideIV->hasMoreThanOneUniqueUser() ||
2035 NewIVTy == WideIV->getScalarType())
2040 VPUser *SingleUser = WideIV->getSingleUser();
2048 auto *NewStart = Plan.
getZero(NewIVTy);
2049 WideIV->setStartValue(NewStart);
2051 WideIV->setStepValue(NewStep);
2058 Cmp->setOperand(1, NewBTC);
2072 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2074 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2087 const SCEV *VectorTripCount =
2092 "Trip count SCEV must be computable");
2113 auto *Term = &ExitingVPBB->
back();
2126 for (
unsigned Part = 0; Part < UF; ++Part) {
2132 Extracts[Part] = Ext;
2144 match(Phi->getBackedgeValue(),
2146 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2163 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2170 "Expected incoming values of Phi to be ActiveLaneMasks");
2175 EntryALM->setOperand(2, ALMMultiplier);
2176 LoopALM->setOperand(2, ALMMultiplier);
2180 ExtractFromALM(EntryALM, EntryExtracts);
2185 ExtractFromALM(LoopALM, LoopExtracts);
2187 Not->setOperand(0, LoopExtracts[0]);
2190 for (
unsigned Part = 0; Part < UF; ++Part) {
2191 Phis[Part]->setStartValue(EntryExtracts[Part]);
2192 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2205 auto *Term = &ExitingVPBB->
back();
2217 const SCEV *VectorTripCount =
2223 "Trip count SCEV must be computable");
2242 Term->setOperand(1, Plan.
getTrue());
2247 {}, Term->getDebugLoc());
2249 Term->eraseFromParent();
2284 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2294 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2295 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2323 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2326 if (SinkCandidate == Previous)
2330 !Seen.
insert(SinkCandidate).second ||
2343 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2346 "only recipes with a single defined value expected");
2361 if (SinkCandidate == FOR)
2364 SinkCandidate->moveAfter(Previous);
2365 Previous = SinkCandidate;
2389 [&VPDT, HoistPoint](
VPUser *U) {
2390 auto *R = cast<VPRecipeBase>(U);
2391 return HoistPoint == R ||
2392 VPDT.properlyDominates(HoistPoint, R);
2394 "HoistPoint must dominate all users of FOR");
2396 auto NeedsHoisting = [HoistPoint, &VPDT,
2398 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2399 if (!HoistCandidate)
2404 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2405 "CFG in VPlan should still be flat, without replicate regions");
2407 if (!Visited.
insert(HoistCandidate).second)
2419 return HoistCandidate;
2428 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2431 "only recipes with a single defined value expected");
2443 if (
auto *R = NeedsHoisting(
Op)) {
2446 if (R->getNumDefinedValues() != 1)
2460 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2480 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2483 while (
auto *PrevPhi =
2485 assert(PrevPhi->getParent() == FOR->getParent());
2487 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2506 {FOR, FOR->getBackedgeValue()});
2511 RecurSplice->setOperand(0, FOR);
2517 for (
VPUser *U : RecurSplice->users()) {
2527 VPValue *PenultimateIndex =
B.createSub(LastActiveLane, One);
2528 VPValue *PenultimateLastIter =
2530 {PenultimateIndex, FOR->getBackedgeValue()});
2535 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2548 RecurKind RK = PhiR->getRecurrenceKind();
2555 RecWithFlags->dropPoisonGeneratingFlags();
2561struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2563 return Def == getEmptyKey() || Def == getTombstoneKey();
2574 return GEP->getSourceElementType();
2577 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2578 [](
auto *
I) {
return I->getSourceElementType(); })
2579 .
Default([](
auto *) {
return nullptr; });
2583 static bool canHandle(
const VPSingleDefRecipe *Def) {
2592 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2593 C->second == Instruction::ExtractValue)))
2599 return !
Def->mayReadFromMemory();
2603 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2604 const VPlan *Plan =
Def->getParent()->getPlan();
2605 VPTypeAnalysis TypeInfo(*Plan);
2608 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2611 if (RFlags->hasPredicate())
2617 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2620 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2622 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2624 !
equal(
L->operands(),
R->operands()))
2627 "must have valid opcode info for both recipes");
2629 if (LFlags->hasPredicate() &&
2630 LFlags->getPredicate() !=
2636 const VPRegionBlock *RegionL =
L->getRegion();
2637 const VPRegionBlock *RegionR =
R->getRegion();
2640 L->getParent() !=
R->getParent())
2642 const VPlan *Plan =
L->getParent()->getPlan();
2643 VPTypeAnalysis TypeInfo(*Plan);
2644 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2660 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2664 if (!VPDT.
dominates(V->getParent(), VPBB))
2669 Def->replaceAllUsesWith(V);
2688 "Expected vector prehader's successor to be the vector loop region");
2695 return !Op->isDefinedOutsideLoopRegions();
2698 R.moveBefore(*Preheader, Preheader->
end());
2716 assert(!RepR->isPredicated() &&
2717 "Expected prior transformation of predicated replicates to "
2718 "replicate regions");
2723 if (!RepR->isSingleScalar())
2733 if (Def->getNumUsers() == 0)
2742 auto *UserR = cast<VPRecipeBase>(U);
2743 VPBasicBlock *Parent = UserR->getParent();
2746 if (UserR->isPhi() || Parent->getEnclosingLoopRegion())
2749 if (SinkBB && SinkBB != Parent)
2763 "Defining block must dominate sink block");
2789 VPValue *ResultVPV = R.getVPSingleValue();
2791 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2792 if (!NewResSizeInBits)
2805 (void)OldResSizeInBits;
2813 VPW->dropPoisonGeneratingFlags();
2815 if (OldResSizeInBits != NewResSizeInBits &&
2819 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2821 Ext->insertAfter(&R);
2823 Ext->setOperand(0, ResultVPV);
2824 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2827 "Only ICmps should not need extending the result.");
2837 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2838 auto *
Op = R.getOperand(Idx);
2839 unsigned OpSizeInBits =
2841 if (OpSizeInBits == NewResSizeInBits)
2843 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2844 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2846 R.setOperand(Idx, ProcessedIter->second);
2854 Builder.setInsertPoint(&R);
2856 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2857 ProcessedIter->second = NewOp;
2858 R.setOperand(Idx, NewOp);
2866 std::optional<VPDominatorTree> VPDT;
2883 assert(VPBB->getNumSuccessors() == 2 &&
2884 "Two successors expected for BranchOnCond");
2885 unsigned RemovedIdx;
2896 "There must be a single edge between VPBB and its successor");
2904 VPBB->back().eraseFromParent();
2916 if (Reachable.contains(
B))
2927 for (
VPValue *Def : R.definedValues())
2928 Def->replaceAllUsesWith(&Tmp);
2929 R.eraseFromParent();
2985 VPValue *StartV = CanonicalIVPHI->getStartValue();
2987 auto *CanonicalIVIncrement =
2990 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2991 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
3002 auto *EntryIncrement = Builder.createOverflowingOp(
3004 DL,
"index.part.next");
3010 {EntryIncrement, TC, ALMMultiplier},
DL,
3011 "active.lane.mask.entry");
3017 LaneMaskPhi->insertAfter(CanonicalIVPHI);
3022 Builder.setInsertPoint(OriginalTerminator);
3023 auto *InLoopIncrement = Builder.createOverflowingOp(
3025 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
3027 {InLoopIncrement, TC, ALMMultiplier},
DL,
3028 "active.lane.mask.next");
3033 auto *NotMask = Builder.createNot(ALM,
DL);
3040 bool UseActiveLaneMaskForControlFlow) {
3042 auto *FoundWidenCanonicalIVUser =
find_if(
3044 assert(FoundWidenCanonicalIVUser &&
3045 "Must have widened canonical IV when tail folding!");
3047 auto *WideCanonicalIV =
3050 if (UseActiveLaneMaskForControlFlow) {
3059 nullptr,
"active.lane.mask");
3075 template <
typename OpTy>
bool match(OpTy *V)
const {
3086template <
typename Op0_t,
typename Op1_t>
3105 VPValue *Addr, *Mask, *EndPtr;
3108 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3110 EVLEndPtr->insertBefore(&CurRecipe);
3111 EVLEndPtr->setOperand(1, &EVL);
3115 auto GetVPReverse = [&CurRecipe, &EVL, &TypeInfo, Plan,
3120 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
3122 Reverse->insertBefore(&CurRecipe);
3126 if (
match(&CurRecipe,
3137 Mask = GetVPReverse(Mask);
3138 Addr = AdjustEndPtr(EndPtr);
3141 LoadR->insertBefore(&CurRecipe);
3143 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
3151 StoredVal, EVL, Mask);
3153 if (
match(&CurRecipe,
3157 Mask = GetVPReverse(Mask);
3158 Addr = AdjustEndPtr(EndPtr);
3159 StoredVal = GetVPReverse(ReversedVal);
3161 StoredVal, EVL, Mask);
3165 if (Rdx->isConditional() &&
3170 if (Interleave->getMask() &&
3175 if (
match(&CurRecipe,
3184 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3204 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3209 HeaderMask = R.getVPSingleValue();
3221 NewR->insertBefore(R);
3222 for (
auto [Old, New] :
3223 zip_equal(R->definedValues(), NewR->definedValues()))
3224 Old->replaceAllUsesWith(New);
3238 Merge->insertBefore(LogicalAnd);
3239 LogicalAnd->replaceAllUsesWith(
Merge);
3247 R->eraseFromParent();
3264 "User of VF that we can't transform to EVL.");
3274 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3275 "increment of the canonical induction.");
3291 MaxEVL = Builder.createScalarZExtOrTrunc(
3295 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3296 VPValue *PrevEVL = Builder.createScalarPhi(
3310 Intrinsic::experimental_vp_splice,
3311 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3315 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3328 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3329 m_VPValue(), m_VPValue()))))
3330 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3331 Plan.getVectorLoopRegion();
3343 VPValue *EVLMask = Builder.createICmp(
3404 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3412 VPValue *StartV = CanonicalIVPHI->getStartValue();
3415 auto *CurrentIteration =
3417 CurrentIteration->insertAfter(CanonicalIVPHI);
3418 VPBuilder Builder(Header, Header->getFirstNonPhi());
3421 VPPhi *AVLPhi = Builder.createScalarPhi(
3425 if (MaxSafeElements) {
3435 auto *CanonicalIVIncrement =
3437 Builder.setInsertPoint(CanonicalIVIncrement);
3441 OpVPEVL = Builder.createScalarZExtOrTrunc(
3442 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3444 auto *NextIter = Builder.createAdd(
3445 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3446 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3447 CurrentIteration->addOperand(NextIter);
3451 "avl.next", {
true,
false});
3459 CanonicalIVPHI->replaceAllUsesWith(CurrentIteration);
3460 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3474 assert(!CurrentIteration &&
3475 "Found multiple CurrentIteration. Only one expected");
3476 CurrentIteration = PhiR;
3480 if (!CurrentIteration)
3491 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3497 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3500 "Unexpected canonical iv");
3506 CanonicalIV->eraseFromParent();
3526 if (!
match(EVLPhi->getBackedgeValue(),
3539 [[maybe_unused]]
bool FoundAVLNext =
3542 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3554 "Expected BranchOnCond with ICmp comparing CanIV increment with vector "
3559 LatchBr->setOperand(
3570 return R->getRegion() ||
3574 for (
const SCEV *Stride : StridesMap.
values()) {
3577 const APInt *StrideConst;
3600 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3607 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3610 if (NewSCEV != ScevExpr) {
3612 ExpSCEV->replaceAllUsesWith(NewExp);
3621 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3625 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3630 while (!Worklist.
empty()) {
3633 if (!Visited.
insert(CurRec).second)
3655 RecWithFlags->isDisjoint()) {
3658 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3659 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3660 RecWithFlags->replaceAllUsesWith(New);
3661 RecWithFlags->eraseFromParent();
3664 RecWithFlags->dropPoisonGeneratingFlags();
3669 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3670 "found instruction with poison generating flags not covered by "
3671 "VPRecipeWithIRFlags");
3676 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3688 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3689 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3690 if (AddrDef && WidenRec->isConsecutive() &&
3691 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3692 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3694 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3698 InterleaveRec->getInterleaveGroup();
3699 bool NeedPredication =
false;
3701 I < NumMembers; ++
I) {
3704 NeedPredication |= BlockNeedsPredication(Member->getParent());
3707 if (NeedPredication)
3708 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3720 if (InterleaveGroups.empty())
3727 for (
const auto *IG : InterleaveGroups) {
3733 StoredValues.
push_back(StoreR->getStoredValue());
3734 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3741 StoredValues.
push_back(StoreR->getStoredValue());
3745 bool NeedsMaskForGaps =
3746 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3747 (!StoredValues.
empty() && !IG->isFull());
3759 VPValue *Addr = Start->getAddr();
3768 assert(IG->getIndex(IRInsertPos) != 0 &&
3769 "index of insert position shouldn't be zero");
3773 IG->getIndex(IRInsertPos),
3777 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3783 if (IG->isReverse()) {
3786 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3787 ReversePtr->insertBefore(InsertPos);
3791 InsertPos->getMask(), NeedsMaskForGaps,
3792 InterleaveMD, InsertPos->getDebugLoc());
3793 VPIG->insertBefore(InsertPos);
3796 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3799 if (!Member->getType()->isVoidTy()) {
3858 AddOp = Instruction::Add;
3859 MulOp = Instruction::Mul;
3861 AddOp =
ID.getInductionOpcode();
3862 MulOp = Instruction::FMul;
3870 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3871 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3880 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3885 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3886 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3892 WidePHI->insertBefore(WidenIVR);
3903 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3907 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3910 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3913 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3920 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3923 WidePHI->addOperand(
Next);
3951 VPlan *Plan = R->getParent()->getPlan();
3952 VPValue *Start = R->getStartValue();
3953 VPValue *Step = R->getStepValue();
3954 VPValue *VF = R->getVFValue();
3956 assert(R->getInductionDescriptor().getKind() ==
3958 "Not a pointer induction according to InductionDescriptor!");
3961 "Recipe should have been replaced");
3967 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3971 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3974 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3976 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3977 R->replaceAllUsesWith(PtrAdd);
3982 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3984 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3987 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3996 if (!R->isReplicator())
4000 R->dissolveToCFGLoop();
4021 assert(Br->getNumOperands() == 2 &&
4022 "BranchOnTwoConds must have exactly 2 conditions");
4026 assert(Successors.size() == 3 &&
4027 "BranchOnTwoConds must have exactly 3 successors");
4032 VPValue *Cond0 = Br->getOperand(0);
4033 VPValue *Cond1 = Br->getOperand(1);
4038 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4051 Br->eraseFromParent();
4074 WidenIVR->replaceAllUsesWith(PtrAdd);
4087 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4088 Select = Builder.createSelect(Blend->getMask(
I),
4089 Blend->getIncomingValue(
I),
Select,
4090 R.getDebugLoc(),
"predphi", *Blend);
4091 Blend->replaceAllUsesWith(
Select);
4096 if (!VEPR->getOffset()) {
4098 "Expected unroller to have materialized offset for UF != 1");
4099 VEPR->materializeOffset();
4114 for (
VPValue *
Op : LastActiveL->operands()) {
4115 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4120 VPValue *FirstInactiveLane = Builder.createNaryOp(
4122 LastActiveL->getDebugLoc(),
"first.inactive.lane");
4128 Builder.createSub(FirstInactiveLane, One,
4129 LastActiveL->getDebugLoc(),
"last.active.lane");
4139 assert(VPI->isMasked() &&
4140 "Unmasked MaskedCond should be simplified earlier");
4141 VPI->replaceAllUsesWith(Builder.createNaryOp(
4153 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4154 VPI->getDebugLoc());
4155 VPI->replaceAllUsesWith(
Add);
4164 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4167 ToRemove.push_back(BranchOnCountInst);
4182 ? Instruction::UIToFP
4183 : Instruction::Trunc;
4184 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4190 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4196 MulOpc = Instruction::FMul;
4197 Flags = VPI->getFastMathFlags();
4199 MulOpc = Instruction::Mul;
4204 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4206 VPI->replaceAllUsesWith(VectorStep);
4212 R->eraseFromParent();
4220 struct EarlyExitInfo {
4231 if (Pred == MiddleVPBB)
4236 VPValue *CondOfEarlyExitingVPBB;
4237 [[maybe_unused]]
bool Matched =
4238 match(EarlyExitingVPBB->getTerminator(),
4240 assert(Matched &&
"Terminator must be BranchOnCond");
4244 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4245 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4247 TrueSucc == ExitBlock
4248 ? CondOfEarlyExitingVPBB
4249 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4255 "exit condition must dominate the latch");
4264 assert(!Exits.
empty() &&
"must have at least one early exit");
4271 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4273 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4274 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4280 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4281 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4283 Exits[
I].EarlyExitingVPBB) &&
4284 "RPO sort must place dominating exits before dominated ones");
4290 VPValue *Combined = Exits[0].CondToExit;
4291 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4292 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4298 "Early exit store masking not implemented");
4302 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4306 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4314 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4348 for (
auto [Exit, VectorEarlyExitVPBB] :
4349 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4350 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4362 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4363 VPValue *NewIncoming = IncomingVal;
4365 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4370 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4371 ExitIRI->addOperand(NewIncoming);
4374 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4408 bool IsLastDispatch = (
I + 2 == Exits.
size());
4410 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4416 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4419 CurrentBB = FalseBB;
4426 "Unexpected terminator");
4427 auto *IsLatchExitTaken =
4429 LatchExitingBranch->getOperand(1));
4431 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4432 LatchExitingBranch->eraseFromParent();
4433 Builder.setInsertPoint(LatchVPBB);
4435 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4437 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4448 Type *RedTy = Ctx.Types.inferScalarType(Red);
4449 VPValue *VecOp = Red->getVecOp();
4454 if (Red->isPartialReduction())
4458 auto IsExtendedRedValidAndClampRange =
4474 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4475 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4476 Red->getFastMathFlags(),
CostKind);
4477 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4486 IsExtendedRedValidAndClampRange(
4489 Ctx.Types.inferScalarType(
A)))
4508 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4509 Opcode != Instruction::FAdd)
4512 Type *RedTy = Ctx.Types.inferScalarType(Red);
4515 auto IsMulAccValidAndClampRange =
4523 if (Red->isPartialReduction())
4528 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4534 (Ext0->getOpcode() != Ext1->getOpcode() ||
4535 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4539 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4541 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4548 ExtCost += Ext0->computeCost(VF, Ctx);
4550 ExtCost += Ext1->computeCost(VF, Ctx);
4552 ExtCost += OuterExt->computeCost(VF, Ctx);
4554 return MulAccCost.
isValid() &&
4555 MulAccCost < ExtCost + MulCost + RedCost;
4560 VPValue *VecOp = Red->getVecOp();
4567 assert(Opcode == Instruction::FAdd &&
4568 "MulAccumulateReduction from an FMul must accumulate into an FAdd "
4577 if (RecipeA && RecipeB &&
4578 IsMulAccValidAndClampRange(
FMul, RecipeA, RecipeB,
nullptr)) {
4600 if (!ExtA || ExtB || !
isa<VPIRValue>(ValB) || Red->isPartialReduction())
4602 Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
4616 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4617 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4618 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4619 Mul->setOperand(1, ExtB);
4629 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4634 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4641 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4650 if (!Red->isPartialReduction() &&
4659 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4668 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4669 Ext0->getOpcode() == Ext1->getOpcode() &&
4670 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4672 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4673 *Ext0, *Ext0, Ext0->getDebugLoc());
4674 NewExt0->insertBefore(Ext0);
4679 Ext->getResultType(),
nullptr, *Ext1,
4680 *Ext1, Ext1->getDebugLoc());
4683 Mul->setOperand(0, NewExt0);
4684 Mul->setOperand(1, NewExt1);
4685 Red->setOperand(1,
Mul);
4698 auto IP = std::next(Red->getIterator());
4699 auto *VPBB = Red->getParent();
4709 Red->replaceAllUsesWith(AbstractR);
4739 for (
VPValue *VPV : VPValues) {
4748 if (
User->usesScalars(VPV))
4751 HoistPoint = HoistBlock->
begin();
4755 "All users must be in the vector preheader or dominated by it");
4760 VPV->replaceUsesWithIf(Broadcast,
4761 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4762 return Broadcast != &U && !U.usesScalars(VPV);
4779 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4780 RepR->getOpcode() != Instruction::Load)
4783 VPValue *Addr = RepR->getOperand(0);
4786 if (!
Loc.AATags.Scope)
4791 if (R.mayWriteToMemory()) {
4793 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4801 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4805 const AAMDNodes &LoadAA = LoadLoc.AATags;
4821 return CommonMetadata;
4824template <
unsigned Opcode>
4829 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4830 "Only Load and Store opcodes supported");
4831 constexpr bool IsLoad = (Opcode == Instruction::Load);
4837 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4842 for (
auto Recipes :
Groups) {
4843 if (Recipes.size() < 2)
4851 VPValue *MaskI = RecipeI->getMask();
4852 Type *TypeI = GetLoadStoreValueType(RecipeI);
4858 bool HasComplementaryMask =
false;
4863 VPValue *MaskJ = RecipeJ->getMask();
4864 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4865 if (TypeI == TypeJ) {
4875 if (HasComplementaryMask) {
4876 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4886template <
typename InstType>
4904 for (
auto &Group :
Groups) {
4924 return R->isSingleScalar() == IsSingleScalar;
4926 "all members in group must agree on IsSingleScalar");
4931 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4932 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4934 UnpredicatedLoad->insertBefore(EarliestLoad);
4938 Load->replaceAllUsesWith(UnpredicatedLoad);
4939 Load->eraseFromParent();
4949 if (!StoreLoc || !StoreLoc->AATags.Scope)
4955 StoresToSink.
end());
4959 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4973 for (
auto &Group :
Groups) {
4986 VPValue *SelectedValue = Group[0]->getOperand(0);
4989 bool IsSingleScalar = Group[0]->isSingleScalar();
4990 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4991 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4992 "all members in group must agree on IsSingleScalar");
4993 VPValue *Mask = Group[
I]->getMask();
4995 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
5004 StoreWithMinAlign->getUnderlyingInstr(),
5005 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
5006 nullptr, *LastStore, CommonMetadata);
5007 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
5011 Store->eraseFromParent();
5018 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
5019 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5084 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5086 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5093 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5103 DefR->replaceUsesWithIf(
5104 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5106 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5120 for (
VPValue *Def : R.definedValues()) {
5133 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5135 return U->usesScalars(Def) &&
5138 if (
none_of(Def->users(), IsCandidateUnpackUser))
5145 Unpack->insertAfter(&R);
5146 Def->replaceUsesWithIf(Unpack,
5147 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5148 return IsCandidateUnpackUser(&U);
5158 bool RequiresScalarEpilogue,
5170 assert(StepR->getParent() == VectorPHVPBB &&
5171 "Step must be defined in VectorPHVPBB");
5173 InsertPt = std::next(StepR->getIterator());
5175 VPBuilder Builder(VectorPHVPBB, InsertPt);
5183 if (TailByMasking) {
5184 TC = Builder.createAdd(
5195 Builder.createNaryOp(Instruction::URem, {TC, Step},
5204 if (RequiresScalarEpilogue) {
5206 "requiring scalar epilogue is not supported with fail folding");
5209 R = Builder.createSelect(IsZero, Step, R);
5223 "VF and VFxUF must be materialized together");
5235 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5242 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5246 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5250 VPValue *MulByUF = Builder.createOverflowingOp(
5262 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5270 const SCEV *Expr = ExpSCEV->getSCEV();
5273 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5278 ExpSCEV->eraseFromParent();
5281 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5282 "before any VPIRInstructions");
5285 auto EI = Entry->begin();
5295 return ExpandedSCEVs;
5307 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5311 return Member0Op == OpV;
5315 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5318 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5335 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5338 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5343 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5344 const auto &[
OpIdx, OpV] =
P;
5359 if (!InterleaveR || InterleaveR->
getMask())
5360 return std::nullopt;
5362 Type *GroupElementTy =
nullptr;
5366 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5367 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5369 return std::nullopt;
5374 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5375 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5377 return std::nullopt;
5381 if (IG->getFactor() != IG->getNumMembers())
5382 return std::nullopt;
5388 assert(
Size.isScalable() == VF.isScalable() &&
5389 "if Size is scalable, VF must be scalable and vice versa");
5390 return Size.getKnownMinValue();
5394 unsigned MinVal = VF.getKnownMinValue();
5396 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5399 return std::nullopt;
5407 return RepR && RepR->isSingleScalar();
5414 auto *R = V->getDefiningRecipe();
5423 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5424 WideMember0->setOperand(
5433 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5435 LoadGroup->getMask(),
true,
5436 {}, LoadGroup->getDebugLoc());
5437 L->insertBefore(LoadGroup);
5443 assert(RepR->isSingleScalar() &&
5445 "must be a single scalar load");
5446 NarrowedOps.
insert(RepR);
5451 VPValue *PtrOp = WideLoad->getAddr();
5453 PtrOp = VecPtr->getOperand(0);
5458 nullptr, {}, *WideLoad);
5459 N->insertBefore(WideLoad);
5464std::unique_ptr<VPlan>
5484 "unexpected branch-on-count");
5488 std::optional<ElementCount> VFToOptimize;
5505 if (R.mayWriteToMemory() && !InterleaveR)
5520 std::optional<ElementCount> NarrowedVF =
5522 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5524 VFToOptimize = NarrowedVF;
5527 if (InterleaveR->getStoredValues().empty())
5532 auto *Member0 = InterleaveR->getStoredValues()[0];
5542 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5545 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5546 return IR && IR->getInterleaveGroup()->isFull() &&
5547 IR->getVPValue(Op.index()) == Op.value();
5556 VFToOptimize->isScalable()))
5561 if (StoreGroups.
empty())
5565 bool RequiresScalarEpilogue =
5576 std::unique_ptr<VPlan> NewPlan;
5578 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5579 Plan.
setVF(*VFToOptimize);
5580 NewPlan->removeVF(*VFToOptimize);
5586 for (
auto *StoreGroup : StoreGroups) {
5593 StoreGroup->getDebugLoc());
5594 S->insertBefore(StoreGroup);
5595 StoreGroup->eraseFromParent();
5607 if (VFToOptimize->isScalable()) {
5620 RequiresScalarEpilogue, Step);
5622 Inc->setOperand(1, Step);
5628 "All VPVectorPointerRecipes should have been removed");
5644 "must have a BranchOnCond");
5647 if (VF.
isScalable() && VScaleForTuning.has_value())
5648 VectorStep *= *VScaleForTuning;
5649 assert(VectorStep > 0 &&
"trip count should not be zero");
5653 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5660 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5672 "Cannot handle loops with uncountable early exits");
5745 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5759 "vector.recur.extract.for.phi");
5778 VPValue *WidenIVCandidate = BinOp->getOperand(0);
5779 VPValue *InvariantCandidate = BinOp->getOperand(1);
5781 std::swap(WidenIVCandidate, InvariantCandidate);
5795 auto *ClonedOp = BinOp->
clone();
5796 if (ClonedOp->getOperand(0) == WidenIV) {
5797 ClonedOp->setOperand(0, ScalarIV);
5799 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
5800 ClonedOp->setOperand(1, ScalarIV);
5815 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
5816 bool UseMax) -> std::optional<APSInt> {
5818 for (
bool Signed : {
true,
false}) {
5827 return std::nullopt;
5835 PhiR->getRecurrenceKind()))
5844 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5858 !
match(FindLastSelect,
5867 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
5873 "IVOfExpressionToSink not being an AddRec must imply "
5874 "FindLastExpression not being an AddRec.");
5885 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
5886 bool UseSigned = SentinelVal && SentinelVal->isSigned();
5893 if (IVOfExpressionToSink) {
5894 const SCEV *FindLastExpressionSCEV =
5896 if (
match(FindLastExpressionSCEV,
5899 if (
auto NewSentinel =
5900 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
5903 SentinelVal = *NewSentinel;
5904 UseSigned = NewSentinel->isSigned();
5906 IVSCEV = FindLastExpressionSCEV;
5907 IVOfExpressionToSink =
nullptr;
5917 if (AR->hasNoSignedWrap())
5919 else if (AR->hasNoUnsignedWrap())
5929 VPValue *NewFindLastSelect = BackedgeVal;
5931 if (!SentinelVal || IVOfExpressionToSink) {
5934 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
5935 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
5936 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
5937 SelectCond = LoopBuilder.
createNot(SelectCond);
5944 if (SelectCond !=
Cond || IVOfExpressionToSink) {
5947 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
5956 VPIRFlags Flags(MinMaxKind,
false,
false,
5962 NewFindLastSelect, Flags, ExitDL);
5965 VPValue *VectorRegionExitingVal = ReducedIV;
5966 if (IVOfExpressionToSink)
5967 VectorRegionExitingVal =
5969 ReducedIV, IVOfExpressionToSink);
5972 VPValue *StartVPV = PhiR->getStartValue();
5979 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
5989 AnyOfPhi->insertAfter(PhiR);
5996 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
6009 PhiR->hasUsesOutsideReductionChain());
6010 NewPhiR->insertBefore(PhiR);
6011 PhiR->replaceAllUsesWith(NewPhiR);
6012 PhiR->eraseFromParent();
6019struct ReductionExtend {
6020 Type *SrcType =
nullptr;
6021 ExtendKind Kind = ExtendKind::PR_None;
6027struct ExtendedReductionOperand {
6031 ReductionExtend ExtendA, ExtendB;
6039struct VPPartialReductionChain {
6042 VPWidenRecipe *ReductionBinOp =
nullptr;
6044 ExtendedReductionOperand ExtendedOp;
6051 unsigned AccumulatorOpIdx;
6052 unsigned ScaleFactor;
6065 if (!
Op->hasOneUse() ||
6071 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6072 Op->getOperand(1), NarrowTy);
6074 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6083 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
6085 assert(Ext->getOpcode() ==
6087 "Expected both the LHS and RHS extends to be the same");
6088 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6091 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6092 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6093 auto *
Max = Builder.insert(
6095 {FreezeX, FreezeY}, SrcTy));
6096 auto *Min = Builder.insert(
6098 {FreezeX, FreezeY}, SrcTy));
6101 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6114 if (!
Mul->hasOneUse() ||
6115 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6116 MulLHS->getOpcode() != MulRHS->getOpcode())
6119 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
6120 MulLHS->getOperand(0),
6121 Ext->getResultType()));
6122 Mul->setOperand(1, MulLHS == MulRHS
6123 ?
Mul->getOperand(0)
6124 : Builder.createWidenCast(MulRHS->getOpcode(),
6125 MulRHS->getOperand(0),
6126 Ext->getResultType()));
6135static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6143 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6159 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6167 Builder.insert(NegRecipe);
6168 ExtendedOp = NegRecipe;
6172 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp, TypeInfo);
6182 assert((!ExitValue || IsLastInChain) &&
6183 "if we found ExitValue, it must match RdxPhi's backedge value");
6194 PartialRed->insertBefore(WidenRecipe);
6211 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6212 StartInst->setOperand(2, NewScaleFactor);
6220 VPValue *OldStartValue = StartInst->getOperand(0);
6221 StartInst->setOperand(0, StartInst->getOperand(1));
6225 assert(RdxResult &&
"Could not find reduction result");
6228 constexpr unsigned SubOpc = Instruction::BinaryOps::Sub;
6234 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6240 const VPPartialReductionChain &Link,
6243 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6244 std::optional<unsigned> BinOpc = std::nullopt;
6246 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6247 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6249 std::optional<llvm::FastMathFlags>
Flags;
6254 ? (unsigned)Instruction::Add
6257 Opcode, ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType, RdxType,
6258 VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6281static std::optional<ExtendedReductionOperand>
6285 "Op should be operand of UpdateR");
6293 if (
Op->hasOneUse() &&
6303 if (LHSInputType != RHSInputType ||
6304 LHSExt->getOpcode() != RHSExt->getOpcode())
6305 return std::nullopt;
6308 return ExtendedReductionOperand{
6310 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6314 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6317 VPValue *CastSource = CastRecipe->getOperand(0);
6318 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6327 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6328 UpdateR->
getOpcode() == Instruction::FAdd) {
6332 return ExtendedReductionOperand{
6339 if (!
Op->hasOneUse())
6340 return std::nullopt;
6349 return std::nullopt;
6359 return std::nullopt;
6363 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6366 const APInt *RHSConst =
nullptr;
6372 return std::nullopt;
6376 if (Cast && OuterExtKind &&
6377 getPartialReductionExtendKind(Cast) != OuterExtKind)
6378 return std::nullopt;
6380 Type *RHSInputType = LHSInputType;
6381 ExtendKind RHSExtendKind = LHSExtendKind;
6384 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6387 return ExtendedReductionOperand{
6388 BinOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6395static std::optional<SmallVector<VPPartialReductionChain>>
6403 return std::nullopt;
6414 VPValue *CurrentValue = ExitValue;
6415 while (CurrentValue != RedPhiR) {
6418 return std::nullopt;
6425 std::optional<ExtendedReductionOperand> ExtendedOp =
6426 matchExtendedReductionOperand(UpdateR,
Op, TypeInfo);
6428 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue, TypeInfo);
6430 return std::nullopt;
6434 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6437 return std::nullopt;
6442 VPPartialReductionChain Link(
6443 {UpdateR, *ExtendedOp, RK,
6447 CurrentValue = PrevValue;
6452 std::reverse(Chain.
begin(), Chain.
end());
6471 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6472 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6475 if (ChainsByPhi.
empty())
6482 for (
const auto &[
_, Chains] : ChainsByPhi)
6483 for (
const VPPartialReductionChain &Chain : Chains) {
6484 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6485 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6491 auto ExtendUsersValid = [&](
VPValue *Ext) {
6493 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6497 auto IsProfitablePartialReductionChainForVF =
6504 for (
const VPPartialReductionChain &Link : Chain) {
6505 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6506 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6510 PartialCost += LinkCost;
6511 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6513 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6514 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6517 RegularCost += Extend->computeCost(VF, CostCtx);
6519 return PartialCost.
isValid() && PartialCost <= RegularCost;
6527 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6528 for (
const VPPartialReductionChain &Chain : Chains) {
6529 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6533 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6535 return PhiR == RedPhiR;
6537 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6543 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6552 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6553 return RepR && isa<StoreInst>(RepR->getUnderlyingInstr());
6564 return IsProfitablePartialReductionChainForVF(Chains, VF);
6570 for (
auto &[Phi, Chains] : ChainsByPhi)
6571 for (
const VPPartialReductionChain &Chain : Chains)
6572 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
6586 if (VPI && VPI->getUnderlyingValue() &&
6600 New->insertBefore(VPI);
6601 if (VPI->getOpcode() == Instruction::Load)
6602 VPI->replaceAllUsesWith(New->getVPSingleValue());
6603 VPI->eraseFromParent();
6608 FinalRedStoresBuilder))
6617 ReplaceWith(Histogram);
6625 ReplaceWith(Recipe);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
void setRecipe(Instruction *I, VPRecipeBase *R)
Set the recipe created for given ingredient.
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
bind_ty< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...