51 GetIntOrFpInductionDescriptor,
58 if (!VPBB->getParent())
61 auto EndIter = Term ? Term->getIterator() : VPBB->end();
66 VPValue *VPV = Ingredient.getVPSingleValue();
75 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
89 Phi, Start, Step, &Plan.
getVF(), *
II, Flags,
90 Ingredient.getDebugLoc());
98 *Load, Ingredient.getOperand(0),
nullptr ,
100 Ingredient.getDebugLoc());
103 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
104 nullptr ,
false ,
false , *VPI,
105 Ingredient.getDebugLoc());
108 Ingredient.getDebugLoc());
116 *VPI, CI->getDebugLoc());
119 *VPI, Ingredient.getDebugLoc());
122 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
126 *VPI, Ingredient.getDebugLoc());
135 "Only recpies with zero or one defined values expected");
136 Ingredient.eraseFromParent();
152 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
157 return RepR && RepR->getOpcode() == Instruction::Alloca;
166 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
182 if (!ScalarVFOnly && RepR->isSingleScalar())
185 WorkList.
insert({SinkTo, Candidate});
197 for (
auto &Recipe : *VPBB)
199 InsertIfValidSinkCandidate(VPBB,
Op);
203 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
206 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
211 auto UsersOutsideSinkTo =
213 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
215 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
216 return !U->usesFirstLaneOnly(SinkCandidate);
219 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
221 if (NeedsDuplicating) {
225 if (
auto *SinkCandidateRepR =
231 nullptr , *SinkCandidateRepR,
235 Clone = SinkCandidate->
clone();
245 InsertIfValidSinkCandidate(SinkTo,
Op);
255 if (!EntryBB || EntryBB->size() != 1 ||
265 if (EntryBB->getNumSuccessors() != 2)
270 if (!Succ0 || !Succ1)
273 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
275 if (Succ0->getSingleSuccessor() == Succ1)
277 if (Succ1->getSingleSuccessor() == Succ0)
294 if (!Region1->isReplicator())
296 auto *MiddleBasicBlock =
298 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
303 if (!Region2 || !Region2->isReplicator())
308 if (!Mask1 || Mask1 != Mask2)
311 assert(Mask1 && Mask2 &&
"both region must have conditions");
317 if (TransformedRegions.
contains(Region1))
324 if (!Then1 || !Then2)
344 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
350 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
351 Phi1ToMove.eraseFromParent();
354 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
368 TransformedRegions.
insert(Region1);
371 return !TransformedRegions.
empty();
378 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
379 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
380 auto *BlockInMask = PredRecipe->
getMask();
399 RecipeWithoutMask->getDebugLoc());
423 if (RepR->isPredicated())
442 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
456 if (!VPBB->getParent())
460 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
469 R.moveBefore(*PredVPBB, PredVPBB->
end());
471 auto *ParentRegion = VPBB->getParent();
472 if (ParentRegion && ParentRegion->getExiting() == VPBB)
473 ParentRegion->setExiting(PredVPBB);
474 for (
auto *Succ :
to_vector(VPBB->successors())) {
480 return !WorkList.
empty();
487 bool ShouldSimplify =
true;
488 while (ShouldSimplify) {
504 if (!
IV ||
IV->getTruncInst())
519 for (
auto *U : FindMyCast->
users()) {
521 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
522 FoundUserCast = UserCast;
526 FindMyCast = FoundUserCast;
551 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
564 WidenOriginalIV->dropPoisonGeneratingFlags();
577 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
579 if (IsConditionalAssume)
582 if (R.mayHaveSideEffects())
586 return all_of(R.definedValues(),
587 [](
VPValue *V) { return V->getNumUsers() == 0; });
603 if (!PhiR || PhiR->getNumOperands() != 2)
605 VPUser *PhiUser = PhiR->getSingleUser();
609 if (PhiUser !=
Incoming->getDefiningRecipe() ||
612 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
613 PhiR->eraseFromParent();
614 Incoming->getDefiningRecipe()->eraseFromParent();
629 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
639 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
645 if (ResultTy != StepTy) {
652 Builder.setInsertPoint(VecPreheader);
653 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
655 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
661 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
666 Users.insert_range(V->users());
668 return Users.takeVector();
702 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
703 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
711 Def->operands(),
true,
713 Clone->insertAfter(Def);
714 Def->replaceAllUsesWith(Clone);
725 VPValue *StepV = PtrIV->getOperand(1);
728 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
730 VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
740 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
741 return U->usesScalars(WideIV);
747 Plan,
ID.getKind(),
ID.getInductionOpcode(),
749 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
750 WideIV->getDebugLoc(), Builder);
753 if (!HasOnlyVectorVFs)
754 WideIV->replaceAllUsesWith(Steps);
756 WideIV->replaceUsesWithIf(Steps, [WideIV](
VPUser &U,
unsigned) {
757 return U.usesScalars(WideIV);
772 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
777 if (!Def || Def->getNumOperands() != 2)
785 auto IsWideIVInc = [&]() {
786 auto &
ID = WideIV->getInductionDescriptor();
789 VPValue *IVStep = WideIV->getStepValue();
790 switch (
ID.getInductionOpcode()) {
791 case Instruction::Add:
793 case Instruction::FAdd:
796 case Instruction::FSub:
799 case Instruction::Sub: {
818 return IsWideIVInc() ? WideIV :
nullptr;
838 if (WideIntOrFp && WideIntOrFp->getTruncInst())
851 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
852 FirstActiveLaneType,
DL);
854 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
861 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
864 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
866 VPValue *Start = WideIV->getStartValue();
867 VPValue *Step = WideIV->getStepValue();
868 EndValue =
B.createDerivedIV(
870 Start, EndValue, Step);
890 assert(EndValue &&
"end value must have been pre-computed");
900 VPValue *Step = WideIV->getStepValue();
903 return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
"ind.escape");
907 return B.createPtrAdd(EndValue,
908 B.createNaryOp(Instruction::Sub, {Zero, Step}),
912 const auto &
ID = WideIV->getInductionDescriptor();
913 return B.createNaryOp(
914 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
917 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
932 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
934 if (PredVPBB == MiddleVPBB)
936 ExitIRI->getOperand(Idx),
940 ExitIRI->getOperand(Idx), SE);
942 ExitIRI->setOperand(Idx, Escape);
959 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
962 ExpR->replaceAllUsesWith(V->second);
963 ExpR->eraseFromParent();
972 while (!WorkList.
empty()) {
974 if (!Seen.
insert(Cur).second)
982 R->eraseFromParent();
989static std::optional<std::pair<bool, unsigned>>
992 std::optional<std::pair<bool, unsigned>>>(R)
995 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
996 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
997 return std::make_pair(
true,
I->getVectorIntrinsicID());
999 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1003 return std::make_pair(
false,
1006 .
Default([](
auto *) {
return std::nullopt; });
1022 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
1024 Ops.push_back(
Op->getLiveInIRValue());
1027 auto FoldToIRValue = [&]() ->
Value * {
1029 if (OpcodeOrIID->first) {
1030 if (R.getNumOperands() != 2)
1032 unsigned ID = OpcodeOrIID->second;
1033 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1036 unsigned Opcode = OpcodeOrIID->second;
1045 return Folder.FoldSelect(
Ops[0],
Ops[1],
1048 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1050 case Instruction::Select:
1051 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1052 case Instruction::ICmp:
1053 case Instruction::FCmp:
1056 case Instruction::GetElementPtr: {
1059 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1069 case Instruction::ExtractElement:
1076 if (
Value *V = FoldToIRValue())
1077 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1083 VPlan *Plan = Def->getParent()->getPlan();
1090 return Def->replaceAllUsesWith(V);
1096 PredPHI->replaceAllUsesWith(
Op);
1104 if (TruncTy == ATy) {
1105 Def->replaceAllUsesWith(
A);
1114 : Instruction::ZExt;
1117 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1119 Ext->setUnderlyingValue(UnderlyingExt);
1121 Def->replaceAllUsesWith(Ext);
1123 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1124 Def->replaceAllUsesWith(Trunc);
1132 for (
VPUser *U :
A->users()) {
1134 for (
VPValue *VPV : R->definedValues())
1148 Def->replaceAllUsesWith(
X);
1149 Def->eraseFromParent();
1155 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1159 return Def->replaceAllUsesWith(
X);
1163 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1167 return Def->replaceAllUsesWith(Def->getOperand(1));
1174 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1175 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1176 return Def->replaceAllUsesWith(
1177 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1181 return Def->replaceAllUsesWith(Plan->
getFalse());
1184 return Def->replaceAllUsesWith(
X);
1189 Def->setOperand(0,
C);
1190 Def->setOperand(1,
Y);
1191 Def->setOperand(2,
X);
1200 X->hasMoreThanOneUniqueUser())
1201 return Def->replaceAllUsesWith(
1202 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1205 return Def->replaceAllUsesWith(
A);
1208 return Def->replaceAllUsesWith(
1209 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1213 return Def->replaceAllUsesWith(
A);
1228 R->setOperand(1,
Y);
1229 R->setOperand(2,
X);
1233 R->replaceAllUsesWith(Cmp);
1238 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1239 Cmp->setDebugLoc(Def->getDebugLoc());
1251 if (
Op->getNumUsers() > 1 ||
1255 }
else if (!UnpairedCmp) {
1256 UnpairedCmp =
Op->getDefiningRecipe();
1260 UnpairedCmp =
nullptr;
1267 if (NewOps.
size() < Def->getNumOperands()) {
1269 return Def->replaceAllUsesWith(NewAnyOf);
1281 return Def->replaceAllUsesWith(NewCmp);
1289 return Def->replaceAllUsesWith(Def->getOperand(1));
1295 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1296 Def->replaceAllUsesWith(
X);
1306 Def->setOperand(1, Def->getOperand(0));
1307 Def->setOperand(0,
Y);
1312 if (Phi->getOperand(0) == Phi->getOperand(1))
1313 Phi->replaceAllUsesWith(Phi->getOperand(0));
1321 Def->replaceAllUsesWith(
1322 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1330 Def->replaceAllUsesWith(
1331 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1338 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1343 Def->replaceAllUsesWith(
1353 "broadcast operand must be single-scalar");
1354 Def->setOperand(0,
C);
1359 if (Phi->getNumOperands() == 1)
1360 Phi->replaceAllUsesWith(Phi->getOperand(0));
1373 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1374 Phi->getSingleUser() == Def) {
1375 Phi->setOperand(0,
Y);
1376 Def->replaceAllUsesWith(Phi);
1383 if (VecPtr->isFirstPart()) {
1384 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1393 Steps->replaceAllUsesWith(Steps->getOperand(0));
1401 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1403 return PhiR && PhiR->isInLoop();
1411 Def->replaceAllUsesWith(
A);
1421 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1422 return Def->replaceAllUsesWith(
A);
1425 if (Plan->
getUF() == 1 &&
1427 return Def->replaceAllUsesWith(
1457 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1464 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1465 true ,
nullptr , *RepR ,
1466 *RepR , RepR->getDebugLoc());
1467 Clone->insertBefore(RepOrWidenR);
1468 unsigned ExtractOpc =
1472 auto *Ext =
new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
1473 Ext->insertBefore(Clone);
1474 Clone->setOperand(0, Ext);
1475 RepR->eraseFromParent();
1483 !
all_of(RepOrWidenR->users(), [RepOrWidenR](
const VPUser *U) {
1484 if (auto *Store = dyn_cast<VPWidenStoreRecipe>(U)) {
1490 assert(RepOrWidenR != Store->getStoredValue() ||
1491 vputils::isSingleScalar(Store->getStoredValue()));
1497 unsigned Opcode = VPI->getOpcode();
1504 return U->usesScalars(RepOrWidenR);
1509 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1510 true ,
nullptr, *RepOrWidenR);
1511 Clone->insertBefore(RepOrWidenR);
1512 RepOrWidenR->replaceAllUsesWith(Clone);
1514 RepOrWidenR->eraseFromParent();
1550 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1551 UniqueValues.
insert(Blend->getIncomingValue(0));
1552 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1554 UniqueValues.
insert(Blend->getIncomingValue(
I));
1556 if (UniqueValues.
size() == 1) {
1557 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1558 Blend->eraseFromParent();
1562 if (Blend->isNormalized())
1568 unsigned StartIndex = 0;
1569 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1574 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1581 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1583 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1584 if (
I == StartIndex)
1586 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1587 OperandsWithMask.
push_back(Blend->getMask(
I));
1592 OperandsWithMask, Blend->getDebugLoc());
1593 NewBlend->insertBefore(&R);
1595 VPValue *DeadMask = Blend->getMask(StartIndex);
1597 Blend->eraseFromParent();
1602 if (NewBlend->getNumOperands() == 3 &&
1604 VPValue *Inc0 = NewBlend->getOperand(0);
1605 VPValue *Inc1 = NewBlend->getOperand(1);
1606 VPValue *OldMask = NewBlend->getOperand(2);
1607 NewBlend->setOperand(0, Inc1);
1608 NewBlend->setOperand(1, Inc0);
1609 NewBlend->setOperand(2, NewMask);
1636 APInt MaxVal = AlignedTC - 1;
1639 unsigned NewBitWidth =
1645 bool MadeChange =
false;
1654 if (!WideIV || !WideIV->isCanonical() ||
1655 WideIV->hasMoreThanOneUniqueUser() ||
1656 NewIVTy == WideIV->getScalarType())
1661 VPUser *SingleUser = WideIV->getSingleUser();
1670 WideIV->setStartValue(NewStart);
1672 WideIV->setStepValue(NewStep);
1678 Cmp->setOperand(1, NewBTC);
1692 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1694 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1707 const SCEV *VectorTripCount =
1712 "Trip count SCEV must be computable");
1732 auto *Term = &ExitingVPBB->
back();
1745 for (
unsigned Part = 0; Part < UF; ++Part) {
1753 Extracts[Part] = Ext;
1754 Ext->insertAfter(ALM);
1765 match(Phi->getBackedgeValue(),
1767 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1780 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1787 "Expected incoming values of Phi to be ActiveLaneMasks");
1792 EntryALM->setOperand(2, ALMMultiplier);
1793 LoopALM->setOperand(2, ALMMultiplier);
1797 ExtractFromALM(EntryALM, EntryExtracts);
1802 ExtractFromALM(LoopALM, LoopExtracts);
1804 Not->setOperand(0, LoopExtracts[0]);
1807 for (
unsigned Part = 0; Part < UF; ++Part) {
1808 Phis[Part]->setStartValue(EntryExtracts[Part]);
1809 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1822 auto *Term = &ExitingVPBB->
back();
1830 const SCEV *VectorTripCount =
1835 "Trip count SCEV must be computable");
1857 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
1858 return R->isCanonical();
1859 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1860 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
1866 R->getScalarType());
1868 HeaderR.eraseFromParent();
1872 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1873 HeaderR.eraseFromParent();
1882 B->setParent(
nullptr);
1891 {}, {}, Term->getDebugLoc());
1895 Term->eraseFromParent();
1922 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
1932 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
1933 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
1942 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
1957 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
1960 if (SinkCandidate == Previous)
1964 !Seen.
insert(SinkCandidate).second ||
1977 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
1980 "only recipes with a single defined value expected");
1995 if (SinkCandidate == FOR)
1998 SinkCandidate->moveAfter(Previous);
1999 Previous = SinkCandidate;
2017 for (
VPUser *U : FOR->users()) {
2023 [&VPDT, HoistPoint](
VPUser *U) {
2024 auto *R = cast<VPRecipeBase>(U);
2025 return HoistPoint == R ||
2026 VPDT.properlyDominates(HoistPoint, R);
2028 "HoistPoint must dominate all users of FOR");
2030 auto NeedsHoisting = [HoistPoint, &VPDT,
2032 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2033 if (!HoistCandidate)
2038 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2039 "CFG in VPlan should still be flat, without replicate regions");
2041 if (!Visited.
insert(HoistCandidate).second)
2053 return HoistCandidate;
2062 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2065 "only recipes with a single defined value expected");
2077 if (
auto *R = NeedsHoisting(
Op))
2089 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2108 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2111 while (
auto *PrevPhi =
2113 assert(PrevPhi->getParent() == FOR->getParent());
2115 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2133 {FOR, FOR->getBackedgeValue()});
2135 FOR->replaceAllUsesWith(RecurSplice);
2138 RecurSplice->setOperand(0, FOR);
2149 RecurKind RK = PhiR->getRecurrenceKind();
2156 RecWithFlags->dropPoisonGeneratingFlags();
2162struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2164 return Def == getEmptyKey() || Def == getTombstoneKey();
2175 return GEP->getSourceElementType();
2178 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2179 [](
auto *
I) {
return I->getSourceElementType(); })
2180 .
Default([](
auto *) {
return nullptr; });
2184 static bool canHandle(
const VPSingleDefRecipe *Def) {
2193 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2194 C->second == Instruction::ExtractValue)))
2200 return !
Def->mayReadFromMemory();
2204 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2205 const VPlan *Plan =
Def->getParent()->getPlan();
2206 VPTypeAnalysis TypeInfo(*Plan);
2209 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2212 if (RFlags->hasPredicate())
2218 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2221 if (
L->getVPDefID() !=
R->getVPDefID() ||
2223 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2225 !
equal(
L->operands(),
R->operands()))
2228 "must have valid opcode info for both recipes");
2230 if (LFlags->hasPredicate() &&
2231 LFlags->getPredicate() !=
2237 const VPRegionBlock *RegionL =
L->getRegion();
2238 const VPRegionBlock *RegionR =
R->getRegion();
2241 L->getParent() !=
R->getParent())
2243 const VPlan *Plan =
L->getParent()->getPlan();
2244 VPTypeAnalysis TypeInfo(*Plan);
2245 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2260 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2264 if (!VPDT.
dominates(V->getParent(), VPBB))
2269 Def->replaceAllUsesWith(V);
2288 "Expected vector prehader's successor to be the vector loop region");
2295 return !Op->isDefinedOutsideLoopRegions();
2298 R.moveBefore(*Preheader, Preheader->
end());
2322 VPValue *ResultVPV = R.getVPSingleValue();
2324 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2325 if (!NewResSizeInBits)
2338 (void)OldResSizeInBits;
2346 VPW->dropPoisonGeneratingFlags();
2348 if (OldResSizeInBits != NewResSizeInBits &&
2353 Ext->insertAfter(&R);
2355 Ext->setOperand(0, ResultVPV);
2356 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2359 "Only ICmps should not need extending the result.");
2368 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2369 auto *
Op = R.getOperand(Idx);
2370 unsigned OpSizeInBits =
2372 if (OpSizeInBits == NewResSizeInBits)
2374 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2375 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2377 R.setOperand(Idx, ProcessedIter->second);
2385 Builder.setInsertPoint(&R);
2387 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2388 ProcessedIter->second = NewOp;
2389 R.setOperand(Idx, NewOp);
2404 assert(VPBB->getNumSuccessors() == 2 &&
2405 "Two successors expected for BranchOnCond");
2406 unsigned RemovedIdx;
2417 "There must be a single edge between VPBB and its successor");
2426 VPBB->back().eraseFromParent();
2488 VPValue *StartV = CanonicalIVPHI->getStartValue();
2490 auto *CanonicalIVIncrement =
2494 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2495 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2505 VPValue *TripCount, *IncrementValue;
2510 IncrementValue = CanonicalIVIncrement;
2516 IncrementValue = CanonicalIVPHI;
2520 auto *EntryIncrement = Builder.createOverflowingOp(
2528 {EntryIncrement, TC, ALMMultiplier},
DL,
2529 "active.lane.mask.entry");
2535 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2540 Builder.setInsertPoint(OriginalTerminator);
2541 auto *InLoopIncrement =
2543 {IncrementValue}, {
false,
false},
DL);
2545 {InLoopIncrement, TripCount, ALMMultiplier},
2546 DL,
"active.lane.mask.next");
2551 auto *NotMask = Builder.createNot(ALM,
DL);
2564 auto *FoundWidenCanonicalIVUser =
find_if(
2568 "Must have at most one VPWideCanonicalIVRecipe");
2569 if (FoundWidenCanonicalIVUser !=
2571 auto *WideCanonicalIV =
2573 WideCanonicalIVs.
push_back(WideCanonicalIV);
2581 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2582 WideCanonicalIVs.
push_back(WidenOriginalIV);
2588 for (
auto *Wide : WideCanonicalIVs) {
2594 assert(VPI->getOperand(0) == Wide &&
2595 "WidenCanonicalIV must be the first operand of the compare");
2596 assert(!HeaderMask &&
"Multiple header masks found?");
2604 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2607 UseActiveLaneMaskForControlFlow) &&
2608 "DataAndControlFlowWithoutRuntimeCheck implies "
2609 "UseActiveLaneMaskForControlFlow");
2612 auto *FoundWidenCanonicalIVUser =
find_if(
2614 assert(FoundWidenCanonicalIVUser &&
2615 "Must have widened canonical IV when tail folding!");
2617 auto *WideCanonicalIV =
2620 if (UseActiveLaneMaskForControlFlow) {
2630 nullptr,
"active.lane.mask");
2646 template <
typename OpTy>
bool match(OpTy *V)
const {
2657template <
typename Op0_t,
typename Op1_t>
2675 VPValue *Addr, *Mask, *EndPtr;
2678 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2680 EVLEndPtr->insertBefore(&CurRecipe);
2681 EVLEndPtr->setOperand(1, &EVL);
2685 if (
match(&CurRecipe,
2691 if (
match(&CurRecipe,
2696 AdjustEndPtr(EndPtr), EVL, Mask);
2709 AdjustEndPtr(EndPtr), EVL, Mask);
2712 if (Rdx->isConditional() &&
2717 if (Interleave->getMask() &&
2722 if (
match(&CurRecipe,
2731 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
2746 "User of VF that we can't transform to EVL.");
2752 [&LoopRegion, &Plan](
VPUser *U) {
2754 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
2755 m_Specific(&Plan.getVFxUF()))) ||
2756 isa<VPWidenPointerInductionRecipe>(U);
2758 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2759 "increment of the canonical induction.");
2779 MaxEVL = Builder.createScalarZExtOrTrunc(
2783 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2784 VPValue *PrevEVL = Builder.createScalarPhi(
2798 Intrinsic::experimental_vp_splice,
2799 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
2803 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2821 VPValue *EVLMask = Builder.createICmp(
2839 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2840 "New recipe must define the same number of values as the "
2845 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
2846 VPValue *CurVPV = CurRecipe->getVPValue(
I);
2858 R->eraseFromParent();
2908 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
2916 VPValue *StartV = CanonicalIVPHI->getStartValue();
2920 EVLPhi->insertAfter(CanonicalIVPHI);
2921 VPBuilder Builder(Header, Header->getFirstNonPhi());
2924 VPPhi *AVLPhi = Builder.createScalarPhi(
2928 if (MaxSafeElements) {
2938 auto *CanonicalIVIncrement =
2940 Builder.setInsertPoint(CanonicalIVIncrement);
2944 OpVPEVL = Builder.createScalarZExtOrTrunc(
2945 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
2947 auto *NextEVLIV = Builder.createOverflowingOp(
2948 Instruction::Add, {OpVPEVL, EVLPhi},
2949 {CanonicalIVIncrement->hasNoUnsignedWrap(),
2950 CanonicalIVIncrement->hasNoSignedWrap()},
2951 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
2952 EVLPhi->addOperand(NextEVLIV);
2954 VPValue *NextAVL = Builder.createOverflowingOp(
2955 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
2963 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
2964 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
2978 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
2989 [[maybe_unused]]
bool FoundAVL =
2992 assert(FoundAVL &&
"Didn't find AVL?");
3000 [[maybe_unused]]
bool FoundAVLNext =
3003 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3014 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3017 "Unexpected canonical iv");
3023 CanonicalIV->eraseFromParent();
3036 match(LatchExitingBr,
3039 "Unexpected terminator in EVL loop");
3046 LatchExitingBr->eraseFromParent();
3056 return R->getRegion() ||
3060 for (
const SCEV *Stride : StridesMap.
values()) {
3063 const APInt *StrideConst;
3064 if (!
match(PSE.
getSCEV(StrideV), m_scev_APInt(StrideConst)))
3080 unsigned BW = U->getType()->getScalarSizeInBits();
3086 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3093 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3096 if (NewSCEV != ScevExpr) {
3098 ExpSCEV->replaceAllUsesWith(NewExp);
3107 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3111 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3116 while (!Worklist.
empty()) {
3119 if (!Visited.
insert(CurRec).second)
3141 RecWithFlags->isDisjoint()) {
3144 Instruction::Add, {
A,
B}, {
false,
false},
3145 RecWithFlags->getDebugLoc());
3146 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3147 RecWithFlags->replaceAllUsesWith(New);
3148 RecWithFlags->eraseFromParent();
3151 RecWithFlags->dropPoisonGeneratingFlags();
3156 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3157 "found instruction with poison generating flags not covered by "
3158 "VPRecipeWithIRFlags");
3163 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3175 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3176 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3177 if (AddrDef && WidenRec->isConsecutive() &&
3178 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3179 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3181 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3185 InterleaveRec->getInterleaveGroup();
3186 bool NeedPredication =
false;
3188 I < NumMembers; ++
I) {
3191 NeedPredication |= BlockNeedsPredication(Member->getParent());
3194 if (NeedPredication)
3195 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3207 if (InterleaveGroups.empty())
3214 for (
const auto *IG : InterleaveGroups) {
3220 StoredValues.
push_back(StoreR->getStoredValue());
3221 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3228 StoredValues.
push_back(StoreR->getStoredValue());
3232 bool NeedsMaskForGaps =
3233 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3234 (!StoredValues.
empty() && !IG->isFull());
3246 VPValue *Addr = Start->getAddr();
3255 assert(IG->getIndex(IRInsertPos) != 0 &&
3256 "index of insert position shouldn't be zero");
3260 IG->getIndex(IRInsertPos),
3264 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3270 if (IG->isReverse()) {
3273 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3274 ReversePtr->insertBefore(InsertPos);
3278 InsertPos->getMask(), NeedsMaskForGaps,
3279 InterleaveMD, InsertPos->getDebugLoc());
3280 VPIG->insertBefore(InsertPos);
3283 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3286 if (!Member->getType()->isVoidTy()) {
3345 AddOp = Instruction::Add;
3346 MulOp = Instruction::Mul;
3348 AddOp =
ID.getInductionOpcode();
3349 MulOp = Instruction::FMul;
3357 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3358 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3360 Flags.dropPoisonGeneratingFlags();
3369 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3374 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3375 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3381 WidePHI->insertBefore(WidenIVR);
3392 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3396 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3399 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3402 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3409 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3412 WidePHI->addOperand(
Next);
3440 VPlan *Plan = R->getParent()->getPlan();
3441 VPValue *Start = R->getStartValue();
3442 VPValue *Step = R->getStepValue();
3443 VPValue *VF = R->getVFValue();
3445 assert(R->getInductionDescriptor().getKind() ==
3447 "Not a pointer induction according to InductionDescriptor!");
3450 "Recipe should have been replaced");
3456 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3460 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3463 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3464 VPValue *PtrAdd = Builder.createNaryOp(
3466 R->replaceAllUsesWith(PtrAdd);
3471 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3473 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3476 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3485 if (!R->isReplicator())
3489 R->dissolveToCFGLoop();
3514 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3515 Select = Builder.createSelect(Blend->getMask(
I),
3516 Blend->getIncomingValue(
I),
Select,
3517 R.getDebugLoc(),
"predphi");
3518 Blend->replaceAllUsesWith(
Select);
3538 ? Instruction::UIToFP
3539 : Instruction::Trunc;
3540 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3546 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3551 Flags = {VPI->getFastMathFlags()};
3556 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3558 VPI->replaceAllUsesWith(VectorStep);
3564 R->eraseFromParent();
3577 "unsupported early exit VPBB");
3588 "Terminator must be be BranchOnCond");
3589 VPValue *CondOfEarlyExitingVPBB =
3591 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3592 ? CondOfEarlyExitingVPBB
3593 : Builder.createNot(CondOfEarlyExitingVPBB);
3610 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3615 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3616 if (ExitIRI->getNumOperands() != 1) {
3619 ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
3622 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3623 if (!IncomingFromEarlyExit->
isLiveIn()) {
3627 "first.active.lane");
3630 nullptr,
"early.exit.value");
3631 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3641 "Unexpected terminator");
3642 auto *IsLatchExitTaken =
3644 LatchExitingBranch->getOperand(1));
3645 auto *AnyExitTaken = Builder.createNaryOp(
3646 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3648 LatchExitingBranch->eraseFromParent();
3658 Type *RedTy = Ctx.Types.inferScalarType(Red);
3659 VPValue *VecOp = Red->getVecOp();
3662 auto IsExtendedRedValidAndClampRange =
3679 ExtRedCost = Ctx.TTI.getPartialReductionCost(
3680 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
3683 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3684 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3685 Red->getFastMathFlags(),
CostKind);
3687 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3695 IsExtendedRedValidAndClampRange(
3698 Ctx.Types.inferScalarType(
A)))
3718 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3721 Type *RedTy = Ctx.Types.inferScalarType(Red);
3724 auto IsMulAccValidAndClampRange =
3731 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3734 if (IsPartialReduction) {
3736 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
3739 MulAccCost = Ctx.TTI.getPartialReductionCost(
3740 Opcode, SrcTy, SrcTy2, RedTy, VF,
3750 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
3754 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
3756 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
3764 ExtCost += Ext0->computeCost(VF, Ctx);
3766 ExtCost += Ext1->computeCost(VF, Ctx);
3768 ExtCost += OuterExt->computeCost(VF, Ctx);
3770 return MulAccCost.
isValid() &&
3771 MulAccCost < ExtCost + MulCost + RedCost;
3776 VPValue *VecOp = Red->getVecOp();
3794 if (!ExtA || ExtB || !ValB->
isLiveIn())
3810 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
3811 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
3812 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
3813 Mul->setOperand(1, ExtB);
3823 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
3828 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
3835 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
3852 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
3861 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
3862 Ext0->getOpcode() == Ext1->getOpcode() &&
3863 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
3865 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
3866 *Ext0, *Ext0, Ext0->getDebugLoc());
3867 NewExt0->insertBefore(Ext0);
3872 Ext->getResultType(),
nullptr, *Ext1,
3873 *Ext1, Ext1->getDebugLoc());
3876 Mul->setOperand(0, NewExt0);
3877 Mul->setOperand(1, NewExt1);
3878 Red->setOperand(1,
Mul);
3891 auto IP = std::next(Red->getIterator());
3892 auto *VPBB = Red->getParent();
3902 Red->replaceAllUsesWith(AbstractR);
3932 for (
VPValue *VPV : VPValues) {
3934 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
3942 if (
User->usesScalars(VPV))
3945 HoistPoint = HoistBlock->
begin();
3949 "All users must be in the vector preheader or dominated by it");
3954 VPV->replaceUsesWithIf(Broadcast,
3955 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
3956 return Broadcast != &U && !U.usesScalars(VPV);
3973 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
3974 RepR->getOpcode() != Instruction::Load)
3977 VPValue *Addr = RepR->getOperand(0);
3980 if (!
Loc.AATags.Scope)
3985 if (R.mayWriteToMemory()) {
3987 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
3995 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
3999 const AAMDNodes &LoadAA = LoadLoc.AATags;
4012 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4013 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4047 auto *TCMO = Builder.createNaryOp(
4075 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4077 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4084 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4094 DefR->replaceUsesWithIf(
4095 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4097 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4111 for (
VPValue *Def : R.definedValues()) {
4124 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4126 return U->usesScalars(Def) &&
4129 if (
none_of(Def->users(), IsCandidateUnpackUser))
4136 Unpack->insertAfter(&R);
4137 Def->replaceUsesWithIf(Unpack,
4138 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4139 return IsCandidateUnpackUser(&U);
4149 bool RequiresScalarEpilogue) {
4151 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
4170 if (TailByMasking) {
4171 TC = Builder.createNaryOp(
4173 {TC, Builder.createNaryOp(Instruction::Sub,
4184 Builder.createNaryOp(Instruction::URem, {TC, Step},
4193 if (RequiresScalarEpilogue) {
4195 "requiring scalar epilogue is not supported with fail folding");
4198 R = Builder.createSelect(IsZero, Step, R);
4201 VPValue *Res = Builder.createNaryOp(
4220 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4227 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4231 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4236 VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
4246 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4254 const SCEV *Expr = ExpSCEV->getSCEV();
4257 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4262 ExpSCEV->eraseFromParent();
4265 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4266 "after any VPIRInstructions");
4269 auto EI = Entry->begin();
4279 return ExpandedSCEVs;
4295 return Member0Op == OpV;
4297 return !W->getMask() && Member0Op == OpV;
4299 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4310 if (!InterleaveR || InterleaveR->
getMask())
4313 Type *GroupElementTy =
nullptr;
4317 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4318 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4325 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4326 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4335 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4336 GroupSize == VectorRegWidth;
4344 return RepR && RepR->isSingleScalar();
4351 auto *R = V->getDefiningRecipe();
4359 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4360 WideMember0->setOperand(
4369 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
4371 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4372 false, {}, LoadGroup->getDebugLoc());
4373 L->insertBefore(LoadGroup);
4379 assert(RepR->isSingleScalar() &&
4381 "must be a single scalar load");
4382 NarrowedOps.
insert(RepR);
4387 VPValue *PtrOp = WideLoad->getAddr();
4389 PtrOp = VecPtr->getOperand(0);
4394 nullptr, {}, *WideLoad);
4395 N->insertBefore(WideLoad);
4425 if (R.mayWriteToMemory() && !InterleaveR)
4447 if (InterleaveR->getStoredValues().empty())
4452 auto *Member0 = InterleaveR->getStoredValues()[0];
4454 all_of(InterleaveR->getStoredValues(),
4455 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4463 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4466 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4467 return IR && IR->getInterleaveGroup()->isFull() &&
4468 IR->getVPValue(Op.index()) == Op.value();
4480 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4482 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4483 R->getNumOperands() > 2)
4486 [WideMember0, Idx =
I](
const auto &
P) {
4487 const auto &[OpIdx, OpV] = P;
4488 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4495 if (StoreGroups.
empty())
4501 for (
auto *StoreGroup : StoreGroups) {
4507 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
4508 false, {}, StoreGroup->getDebugLoc());
4509 S->insertBefore(StoreGroup);
4510 StoreGroup->eraseFromParent();
4528 Inc->setOperand(1, UF);
4547 "must have a BranchOnCond");
4550 if (VF.
isScalable() && VScaleForTuning.has_value())
4551 VectorStep *= *VScaleForTuning;
4552 assert(VectorStep > 0 &&
"trip count should not be zero");
4556 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
4569 if (WideIntOrFp && WideIntOrFp->getTruncInst())
4576 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
4579 Start, VectorTC, Step);
4602 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4612 IVEndValues[WideIVR] = EndValue;
4613 ResumePhiR->setOperand(0, EndValue);
4614 ResumePhiR->setName(
"bc.resume.val");
4621 "should only skip truncated wide inductions");
4629 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
4631 "Cannot handle loops with uncountable early exits");
4635 "vector.recur.extract");
4636 ResumePhiR->
setName(IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx");
4637 ResumePhiR->setOperand(0, ResumeFromVectorLoop);
4646 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
4647 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4659 "Cannot handle loops with uncountable early exits");
4731 for (
VPUser *U : FOR->users()) {
4745 {},
"vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLanePerPart, Op0_t > m_ExtractLastLanePerPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...