45 cl::desc(
"Enable use of wide get active lane mask instructions"));
50 GetIntOrFpInductionDescriptor,
57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
74 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
84 Phi, Start, Step, &Plan.
getVF(), *
II, Ingredient.getDebugLoc());
88 "only VPInstructions expected here");
93 *Load, Ingredient.getOperand(0),
nullptr ,
94 false ,
false , Load->getAlign(),
98 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
99 nullptr ,
false ,
false ,
101 Ingredient.getDebugLoc());
110 drop_end(Ingredient.operands()), CI->getType(),
116 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
127 "Only recpies with zero or one defined values expected");
128 Ingredient.eraseFromParent();
144 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
149 return RepR && RepR->getOpcode() == Instruction::Alloca;
158 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
170 if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() ||
171 Candidate->mayReadOrWriteMemory())
175 if (!ScalarVFOnly && RepR->isSingleScalar())
178 WorkList.
insert({SinkTo, Candidate});
190 for (
auto &Recipe : *VPBB)
192 InsertIfValidSinkCandidate(VPBB,
Op);
196 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
199 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
204 auto UsersOutsideSinkTo =
206 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
208 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
209 return !U->onlyFirstLaneUsed(SinkCandidate);
212 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
214 if (NeedsDuplicating) {
218 if (
auto *SinkCandidateRepR =
224 nullptr , *SinkCandidateRepR);
227 Clone = SinkCandidate->
clone();
237 InsertIfValidSinkCandidate(SinkTo,
Op);
247 if (!EntryBB || EntryBB->size() != 1 ||
257 if (EntryBB->getNumSuccessors() != 2)
262 if (!Succ0 || !Succ1)
265 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
267 if (Succ0->getSingleSuccessor() == Succ1)
269 if (Succ1->getSingleSuccessor() == Succ0)
286 if (!Region1->isReplicator())
288 auto *MiddleBasicBlock =
290 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
295 if (!Region2 || !Region2->isReplicator())
300 if (!Mask1 || Mask1 != Mask2)
303 assert(Mask1 && Mask2 &&
"both region must have conditions");
309 if (TransformedRegions.
contains(Region1))
316 if (!Then1 || !Then2)
336 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
342 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
343 Phi1ToMove.eraseFromParent();
346 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
360 TransformedRegions.
insert(Region1);
363 return !TransformedRegions.
empty();
370 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
371 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
372 auto *BlockInMask = PredRecipe->
getMask();
390 RecipeWithoutMask->getDebugLoc());
414 if (RepR->isPredicated())
433 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
447 if (!VPBB->getParent())
451 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
460 R.moveBefore(*PredVPBB, PredVPBB->
end());
462 auto *ParentRegion = VPBB->getParent();
463 if (ParentRegion && ParentRegion->getExiting() == VPBB)
464 ParentRegion->setExiting(PredVPBB);
465 for (
auto *Succ :
to_vector(VPBB->successors())) {
471 return !WorkList.
empty();
478 bool ShouldSimplify =
true;
479 while (ShouldSimplify) {
495 if (!
IV ||
IV->getTruncInst())
506 auto &Casts =
IV->getInductionDescriptor().getCastInsts();
510 for (
auto *U : FindMyCast->
users()) {
512 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
513 FoundUserCast = UserCast;
517 FindMyCast = FoundUserCast;
542 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
563 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
565 if (IsConditionalAssume)
568 if (R.mayHaveSideEffects())
572 return all_of(R.definedValues(),
573 [](
VPValue *V) { return V->getNumUsers() == 0; });
589 if (!PhiR || PhiR->getNumOperands() != 2 || PhiR->getNumUsers() != 1)
592 if (*PhiR->user_begin() !=
Incoming->getDefiningRecipe() ||
595 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
596 PhiR->eraseFromParent();
597 Incoming->getDefiningRecipe()->eraseFromParent();
612 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
622 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
628 if (ResultTy != StepTy) {
635 Builder.setInsertPoint(VecPreheader);
636 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
638 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
644 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
649 Users.insert_range(V->users());
651 return Users.takeVector();
685 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
686 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
694 Def->operands(),
true);
695 Clone->insertAfter(Def);
696 Def->replaceAllUsesWith(Clone);
707 VPValue *StepV = PtrIV->getOperand(1);
710 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
712 VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
722 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
723 return U->usesScalars(WideIV);
729 Plan,
ID.getKind(),
ID.getInductionOpcode(),
731 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
732 WideIV->getDebugLoc(), Builder);
735 if (!HasOnlyVectorVFs)
736 WideIV->replaceAllUsesWith(Steps);
738 WideIV->replaceUsesWithIf(Steps, [WideIV](
VPUser &U,
unsigned) {
739 return U.usesScalars(WideIV);
754 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
759 if (!Def || Def->getNumOperands() != 2)
767 auto IsWideIVInc = [&]() {
768 auto &
ID = WideIV->getInductionDescriptor();
771 VPValue *IVStep = WideIV->getStepValue();
772 switch (
ID.getInductionOpcode()) {
773 case Instruction::Add:
775 case Instruction::FAdd:
778 case Instruction::FSub:
781 case Instruction::Sub: {
800 return IsWideIVInc() ? WideIV :
nullptr;
820 if (WideIntOrFp && WideIntOrFp->getTruncInst())
833 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
834 FirstActiveLaneType,
DL);
836 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
843 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
846 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
848 VPValue *Start = WideIV->getStartValue();
849 VPValue *Step = WideIV->getStepValue();
850 EndValue =
B.createDerivedIV(
852 Start, EndValue, Step);
872 assert(EndValue &&
"end value must have been pre-computed");
882 VPValue *Step = WideIV->getStepValue();
885 return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
"ind.escape");
889 return B.createPtrAdd(EndValue,
890 B.createNaryOp(Instruction::Sub, {Zero, Step}),
894 const auto &
ID = WideIV->getInductionDescriptor();
895 return B.createNaryOp(
896 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
899 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
914 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
916 if (PredVPBB == MiddleVPBB)
918 ExitIRI->getOperand(Idx),
922 ExitIRI->getOperand(Idx), SE);
924 ExitIRI->setOperand(Idx, Escape);
941 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
944 ExpR->replaceAllUsesWith(V->second);
945 ExpR->eraseFromParent();
954 while (!WorkList.
empty()) {
956 if (!Seen.
insert(Cur).second)
964 R->eraseFromParent();
971static std::optional<std::pair<bool, unsigned>>
974 std::optional<std::pair<bool, unsigned>>>(R)
977 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
978 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
979 return std::make_pair(
true,
I->getVectorIntrinsicID());
981 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
985 return std::make_pair(
false,
988 .
Default([](
auto *) {
return std::nullopt; });
1004 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
1006 Ops.push_back(
Op->getLiveInIRValue());
1009 auto FoldToIRValue = [&]() ->
Value * {
1011 if (OpcodeOrIID->first) {
1012 if (R.getNumOperands() != 2)
1014 unsigned ID = OpcodeOrIID->second;
1015 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1018 unsigned Opcode = OpcodeOrIID->second;
1027 return Folder.FoldSelect(
Ops[0],
Ops[1],
1030 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1032 case Instruction::Select:
1033 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1034 case Instruction::ICmp:
1035 case Instruction::FCmp:
1038 case Instruction::GetElementPtr: {
1041 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1051 case Instruction::ExtractElement:
1058 if (
Value *V = FoldToIRValue())
1059 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1065 VPlan *Plan = Def->getParent()->getPlan();
1072 return Def->replaceAllUsesWith(V);
1078 PredPHI->replaceAllUsesWith(
Op);
1086 if (TruncTy == ATy) {
1087 Def->replaceAllUsesWith(
A);
1096 : Instruction::ZExt;
1099 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1101 Ext->setUnderlyingValue(UnderlyingExt);
1103 Def->replaceAllUsesWith(Ext);
1105 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1106 Def->replaceAllUsesWith(Trunc);
1114 for (
VPUser *U :
A->users()) {
1116 for (
VPValue *VPV : R->definedValues())
1130 Def->replaceAllUsesWith(
X);
1131 Def->eraseFromParent();
1137 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1141 return Def->replaceAllUsesWith(
X);
1145 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1149 return Def->replaceAllUsesWith(Def->getOperand(1));
1156 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1157 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1158 return Def->replaceAllUsesWith(
1159 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1163 return Def->replaceAllUsesWith(Plan->
getFalse());
1166 return Def->replaceAllUsesWith(
X);
1171 Def->setOperand(0,
C);
1172 Def->setOperand(1,
Y);
1173 Def->setOperand(2,
X);
1182 X->hasMoreThanOneUniqueUser())
1183 return Def->replaceAllUsesWith(
1184 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1187 return Def->replaceAllUsesWith(
A);
1190 return Def->replaceAllUsesWith(
1191 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1195 return Def->replaceAllUsesWith(
A);
1210 R->setOperand(1,
Y);
1211 R->setOperand(2,
X);
1215 R->replaceAllUsesWith(Cmp);
1220 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1221 Cmp->setDebugLoc(Def->getDebugLoc());
1231 return Def->replaceAllUsesWith(Def->getOperand(1));
1237 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1238 Def->replaceAllUsesWith(
X);
1248 Def->setOperand(1, Def->getOperand(0));
1249 Def->setOperand(0,
Y);
1254 if (Phi->getOperand(0) == Phi->getOperand(1))
1255 Phi->replaceAllUsesWith(Phi->getOperand(0));
1263 Def->replaceAllUsesWith(
1264 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1272 Def->replaceAllUsesWith(
1273 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1280 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1285 Def->replaceAllUsesWith(
1291 if (Phi->getNumOperands() == 1)
1292 Phi->replaceAllUsesWith(Phi->getOperand(0));
1305 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1306 Phi->getNumUsers() == 1 && (*Phi->user_begin() == Def)) {
1307 Phi->setOperand(0,
Y);
1308 Def->replaceAllUsesWith(Phi);
1315 if (VecPtr->isFirstPart()) {
1316 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1325 Steps->replaceAllUsesWith(Steps->getOperand(0));
1333 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1335 return PhiR && PhiR->isInLoop();
1343 Def->replaceAllUsesWith(
A);
1353 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1354 return Def->replaceAllUsesWith(
A);
1357 if (Plan->
getUF() == 1 &&
1359 return Def->replaceAllUsesWith(
1389 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1396 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1397 true ,
nullptr , *RepR );
1398 Clone->insertBefore(RepOrWidenR);
1399 unsigned ExtractOpc =
1403 auto *Ext =
new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
1404 Ext->insertBefore(Clone);
1405 Clone->setOperand(0, Ext);
1406 RepR->eraseFromParent();
1414 !
all_of(RepOrWidenR->users(), [RepOrWidenR](
const VPUser *U) {
1415 return U->usesScalars(RepOrWidenR) ||
1416 match(cast<VPRecipeBase>(U),
1417 m_CombineOr(m_ExtractLastElement(m_VPValue()),
1418 m_ExtractLastLanePerPart(m_VPValue())));
1423 RepOrWidenR->operands(),
1425 Clone->insertBefore(RepOrWidenR);
1426 RepOrWidenR->replaceAllUsesWith(Clone);
1428 RepOrWidenR->eraseFromParent();
1464 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1465 UniqueValues.
insert(Blend->getIncomingValue(0));
1466 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1468 UniqueValues.
insert(Blend->getIncomingValue(
I));
1470 if (UniqueValues.
size() == 1) {
1471 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1472 Blend->eraseFromParent();
1476 if (Blend->isNormalized())
1482 unsigned StartIndex = 0;
1483 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1488 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1495 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1497 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1498 if (
I == StartIndex)
1500 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1501 OperandsWithMask.
push_back(Blend->getMask(
I));
1506 OperandsWithMask, Blend->getDebugLoc());
1507 NewBlend->insertBefore(&R);
1509 VPValue *DeadMask = Blend->getMask(StartIndex);
1511 Blend->eraseFromParent();
1516 if (NewBlend->getNumOperands() == 3 &&
1518 VPValue *Inc0 = NewBlend->getOperand(0);
1519 VPValue *Inc1 = NewBlend->getOperand(1);
1520 VPValue *OldMask = NewBlend->getOperand(2);
1521 NewBlend->setOperand(0, Inc1);
1522 NewBlend->setOperand(1, Inc0);
1523 NewBlend->setOperand(2, NewMask);
1550 APInt MaxVal = AlignedTC - 1;
1553 unsigned NewBitWidth =
1559 bool MadeChange =
false;
1568 if (!WideIV || !WideIV->isCanonical() ||
1569 WideIV->hasMoreThanOneUniqueUser() ||
1570 NewIVTy == WideIV->getScalarType())
1575 if (!
match(*WideIV->user_begin(),
1583 WideIV->setStartValue(NewStart);
1585 WideIV->setStepValue(NewStep);
1591 Cmp->setOperand(1, NewBTC);
1605 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1607 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1620 const SCEV *VectorTripCount =
1625 "Trip count SCEV must be computable");
1645 auto *Term = &ExitingVPBB->
back();
1658 for (
unsigned Part = 0; Part < UF; ++Part) {
1665 Extracts[Part] = Ext;
1666 Ext->insertAfter(ALM);
1677 match(Phi->getBackedgeValue(),
1679 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1692 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1699 "Expected incoming values of Phi to be ActiveLaneMasks");
1704 EntryALM->setOperand(2, ALMMultiplier);
1705 LoopALM->setOperand(2, ALMMultiplier);
1709 ExtractFromALM(EntryALM, EntryExtracts);
1714 ExtractFromALM(LoopALM, LoopExtracts);
1716 Not->setOperand(0, LoopExtracts[0]);
1719 for (
unsigned Part = 0; Part < UF; ++Part) {
1720 Phis[Part]->setStartValue(EntryExtracts[Part]);
1721 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1734 auto *Term = &ExitingVPBB->
back();
1743 const SCEV *TripCount =
1746 "Trip count SCEV must be computable");
1749 if (TripCount->
isZero() ||
1769 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
1770 return R->isCanonical();
1771 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1772 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
1778 R->getScalarType());
1780 HeaderR.eraseFromParent();
1784 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1785 HeaderR.eraseFromParent();
1794 B->setParent(
nullptr);
1803 Term->getDebugLoc());
1807 Term->eraseFromParent();
1815 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
1816 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
1824 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
1842 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
1845 if (SinkCandidate == Previous)
1849 !Seen.
insert(SinkCandidate).second ||
1862 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
1865 "only recipes with a single defined value expected");
1880 if (SinkCandidate == FOR)
1883 SinkCandidate->moveAfter(Previous);
1884 Previous = SinkCandidate;
1902 for (
VPUser *U : FOR->users()) {
1908 [&VPDT, HoistPoint](
VPUser *U) {
1909 auto *R = cast<VPRecipeBase>(U);
1910 return HoistPoint == R ||
1911 VPDT.properlyDominates(HoistPoint, R);
1913 "HoistPoint must dominate all users of FOR");
1915 auto NeedsHoisting = [HoistPoint, &VPDT,
1917 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
1918 if (!HoistCandidate)
1923 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
1924 "CFG in VPlan should still be flat, without replicate regions");
1926 if (!Visited.
insert(HoistCandidate).second)
1938 return HoistCandidate;
1947 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
1950 "only recipes with a single defined value expected");
1962 if (
auto *R = NeedsHoisting(
Op))
1974 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
1993 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
1996 while (
auto *PrevPhi =
1998 assert(PrevPhi->getParent() == FOR->getParent());
2000 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2018 {FOR, FOR->getBackedgeValue()});
2020 FOR->replaceAllUsesWith(RecurSplice);
2023 RecurSplice->setOperand(0, FOR);
2034 RecurKind RK = PhiR->getRecurrenceKind();
2041 RecWithFlags->dropPoisonGeneratingFlags();
2047struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2049 return Def == getEmptyKey() || Def == getTombstoneKey();
2060 return GEP->getSourceElementType();
2063 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2064 [](
auto *
I) {
return I->getSourceElementType(); })
2065 .
Default([](
auto *) {
return nullptr; });
2069 static bool canHandle(
const VPSingleDefRecipe *Def) {
2078 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2079 C->second == Instruction::ExtractValue)))
2085 return !
Def->mayReadFromMemory();
2089 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2090 const VPlan *Plan =
Def->getParent()->getPlan();
2091 VPTypeAnalysis TypeInfo(*Plan);
2094 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2097 if (RFlags->hasPredicate())
2103 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2106 if (
L->getVPDefID() !=
R->getVPDefID() ||
2108 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2110 !
equal(
L->operands(),
R->operands()))
2113 "must have valid opcode info for both recipes");
2115 if (LFlags->hasPredicate() &&
2116 LFlags->getPredicate() !=
2122 const VPRegionBlock *RegionL =
L->getRegion();
2123 const VPRegionBlock *RegionR =
R->getRegion();
2126 L->getParent() !=
R->getParent())
2128 const VPlan *Plan =
L->getParent()->getPlan();
2129 VPTypeAnalysis TypeInfo(*Plan);
2130 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2145 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2149 if (!VPDT.
dominates(V->getParent(), VPBB))
2154 Def->replaceAllUsesWith(V);
2173 "Expected vector prehader's successor to be the vector loop region");
2180 return !Op->isDefinedOutsideLoopRegions();
2183 R.moveBefore(*Preheader, Preheader->
end());
2207 VPValue *ResultVPV = R.getVPSingleValue();
2209 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2210 if (!NewResSizeInBits)
2223 (void)OldResSizeInBits;
2231 VPW->dropPoisonGeneratingFlags();
2233 if (OldResSizeInBits != NewResSizeInBits &&
2238 Ext->insertAfter(&R);
2240 Ext->setOperand(0, ResultVPV);
2241 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2244 "Only ICmps should not need extending the result.");
2253 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2254 auto *
Op = R.getOperand(Idx);
2255 unsigned OpSizeInBits =
2257 if (OpSizeInBits == NewResSizeInBits)
2259 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2260 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2262 R.setOperand(Idx, ProcessedIter->second);
2270 Builder.setInsertPoint(&R);
2272 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2273 ProcessedIter->second = NewOp;
2274 R.setOperand(Idx, NewOp);
2289 assert(VPBB->getNumSuccessors() == 2 &&
2290 "Two successors expected for BranchOnCond");
2291 unsigned RemovedIdx;
2302 "There must be a single edge between VPBB and its successor");
2311 VPBB->back().eraseFromParent();
2372 VPValue *StartV = CanonicalIVPHI->getStartValue();
2374 auto *CanonicalIVIncrement =
2378 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2379 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2389 VPValue *TripCount, *IncrementValue;
2394 IncrementValue = CanonicalIVIncrement;
2400 IncrementValue = CanonicalIVPHI;
2404 auto *EntryIncrement = Builder.createOverflowingOp(
2412 {EntryIncrement, TC, ALMMultiplier},
DL,
2413 "active.lane.mask.entry");
2419 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2424 Builder.setInsertPoint(OriginalTerminator);
2425 auto *InLoopIncrement =
2427 {IncrementValue}, {
false,
false},
DL);
2429 {InLoopIncrement, TripCount, ALMMultiplier},
2430 DL,
"active.lane.mask.next");
2435 auto *NotMask = Builder.createNot(ALM,
DL);
2448 auto *FoundWidenCanonicalIVUser =
find_if(
2452 "Must have at most one VPWideCanonicalIVRecipe");
2453 if (FoundWidenCanonicalIVUser !=
2455 auto *WideCanonicalIV =
2457 WideCanonicalIVs.
push_back(WideCanonicalIV);
2465 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2466 WideCanonicalIVs.
push_back(WidenOriginalIV);
2472 for (
auto *Wide : WideCanonicalIVs) {
2478 assert(VPI->getOperand(0) == Wide &&
2479 "WidenCanonicalIV must be the first operand of the compare");
2480 assert(!HeaderMask &&
"Multiple header masks found?");
2488 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2491 UseActiveLaneMaskForControlFlow) &&
2492 "DataAndControlFlowWithoutRuntimeCheck implies "
2493 "UseActiveLaneMaskForControlFlow");
2496 auto *FoundWidenCanonicalIVUser =
find_if(
2498 assert(FoundWidenCanonicalIVUser &&
2499 "Must have widened canonical IV when tail folding!");
2501 auto *WideCanonicalIV =
2504 if (UseActiveLaneMaskForControlFlow) {
2514 nullptr,
"active.lane.mask");
2530 template <
typename OpTy>
bool match(OpTy *V)
const {
2543template <
typename Op0_t,
typename Op1_t>
2561 VPValue *Addr, *Mask, *EndPtr;
2564 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2566 EVLEndPtr->insertBefore(&CurRecipe);
2567 EVLEndPtr->setOperand(1, &EVL);
2571 if (
match(&CurRecipe,
2577 if (
match(&CurRecipe,
2582 AdjustEndPtr(EndPtr), EVL, Mask);
2595 AdjustEndPtr(EndPtr), EVL, Mask);
2598 if (Rdx->isConditional() &&
2603 if (Interleave->getMask() &&
2608 if (
match(&CurRecipe,
2626 "User of VF that we can't transform to EVL.");
2632 [&LoopRegion, &Plan](
VPUser *U) {
2634 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
2635 m_Specific(&Plan.getVFxUF()))) ||
2636 isa<VPWidenPointerInductionRecipe>(U);
2638 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2639 "increment of the canonical induction.");
2659 MaxEVL = Builder.createScalarZExtOrTrunc(
2663 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2664 VPValue *PrevEVL = Builder.createScalarPhi(
2678 Intrinsic::experimental_vp_splice,
2679 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
2682 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2700 VPValue *EVLMask = Builder.createICmp(
2718 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2719 "New recipe must define the same number of values as the "
2724 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
2725 VPValue *CurVPV = CurRecipe->getVPValue(
I);
2737 R->eraseFromParent();
2787 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
2795 VPValue *StartV = CanonicalIVPHI->getStartValue();
2799 EVLPhi->insertAfter(CanonicalIVPHI);
2800 VPBuilder Builder(Header, Header->getFirstNonPhi());
2803 VPPhi *AVLPhi = Builder.createScalarPhi(
2807 if (MaxSafeElements) {
2817 auto *CanonicalIVIncrement =
2819 Builder.setInsertPoint(CanonicalIVIncrement);
2823 OpVPEVL = Builder.createScalarZExtOrTrunc(
2824 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
2826 auto *NextEVLIV = Builder.createOverflowingOp(
2827 Instruction::Add, {OpVPEVL, EVLPhi},
2828 {CanonicalIVIncrement->hasNoUnsignedWrap(),
2829 CanonicalIVIncrement->hasNoSignedWrap()},
2830 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
2831 EVLPhi->addOperand(NextEVLIV);
2833 VPValue *NextAVL = Builder.createOverflowingOp(
2834 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
2842 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
2843 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
2857 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
2868 [[maybe_unused]]
bool FoundAVL =
2871 assert(FoundAVL &&
"Didn't find AVL?");
2879 [[maybe_unused]]
bool FoundAVLNext =
2882 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
2893 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
2896 "Unexpected canonical iv");
2902 CanonicalIV->eraseFromParent();
2915 match(LatchExitingBr,
2918 "Unexpected terminator in EVL loop");
2925 LatchExitingBr->eraseFromParent();
2935 return R->getRegion() ||
2939 for (
const SCEV *Stride : StridesMap.
values()) {
2942 const APInt *StrideConst;
2943 if (!
match(PSE.
getSCEV(StrideV), m_scev_APInt(StrideConst)))
2959 unsigned BW = U->getType()->getScalarSizeInBits();
2965 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
2972 const SCEV *ScevExpr = ExpSCEV->getSCEV();
2975 if (NewSCEV != ScevExpr) {
2977 ExpSCEV->replaceAllUsesWith(NewExp);
2986 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
2990 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
2995 while (!Worklist.
empty()) {
2998 if (!Visited.
insert(CurRec).second)
3020 RecWithFlags->isDisjoint()) {
3023 Instruction::Add, {
A,
B}, {
false,
false},
3024 RecWithFlags->getDebugLoc());
3025 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3026 RecWithFlags->replaceAllUsesWith(New);
3027 RecWithFlags->eraseFromParent();
3030 RecWithFlags->dropPoisonGeneratingFlags();
3035 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3036 "found instruction with poison generating flags not covered by "
3037 "VPRecipeWithIRFlags");
3042 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3054 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3055 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3056 if (AddrDef && WidenRec->isConsecutive() &&
3057 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3058 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3060 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3064 InterleaveRec->getInterleaveGroup();
3065 bool NeedPredication =
false;
3067 I < NumMembers; ++
I) {
3070 NeedPredication |= BlockNeedsPredication(Member->getParent());
3073 if (NeedPredication)
3074 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3086 if (InterleaveGroups.empty())
3093 for (
const auto *IG : InterleaveGroups) {
3099 StoredValues.
push_back(StoreR->getStoredValue());
3100 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3107 StoredValues.
push_back(StoreR->getStoredValue());
3111 bool NeedsMaskForGaps =
3112 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3113 (!StoredValues.
empty() && !IG->isFull());
3125 VPValue *Addr = Start->getAddr();
3134 assert(IG->getIndex(IRInsertPos) != 0 &&
3135 "index of insert position shouldn't be zero");
3139 IG->getIndex(IRInsertPos),
3143 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3149 if (IG->isReverse()) {
3152 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3153 ReversePtr->insertBefore(InsertPos);
3157 InsertPos->getMask(), NeedsMaskForGaps,
3158 InterleaveMD, InsertPos->getDebugLoc());
3159 VPIG->insertBefore(InsertPos);
3162 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3165 if (!Member->getType()->isVoidTy()) {
3226 AddOp = Instruction::Add;
3227 MulOp = Instruction::Mul;
3229 AddOp =
ID.getInductionOpcode();
3230 MulOp = Instruction::FMul;
3231 Flags =
ID.getInductionBinOp()->getFastMathFlags();
3239 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3240 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3249 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3254 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3255 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3261 WidePHI->insertBefore(WidenIVR);
3272 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3276 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3279 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3282 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3289 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3292 WidePHI->addOperand(
Next);
3320 VPlan *Plan = R->getParent()->getPlan();
3321 VPValue *Start = R->getStartValue();
3322 VPValue *Step = R->getStepValue();
3323 VPValue *VF = R->getVFValue();
3325 assert(R->getInductionDescriptor().getKind() ==
3327 "Not a pointer induction according to InductionDescriptor!");
3330 "Recipe should have been replaced");
3336 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3340 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3343 Offset = Builder.createNaryOp(Instruction::Mul, {
Offset, Step});
3344 VPValue *PtrAdd = Builder.createNaryOp(
3346 R->replaceAllUsesWith(PtrAdd);
3351 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3353 VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF});
3356 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3365 if (!R->isReplicator())
3369 R->dissolveToCFGLoop();
3394 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3395 Select = Builder.createSelect(Blend->getMask(
I),
3396 Blend->getIncomingValue(
I),
Select,
3397 R.getDebugLoc(),
"predphi");
3398 Blend->replaceAllUsesWith(
Select);
3418 ? Instruction::UIToFP
3419 : Instruction::Trunc;
3420 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3426 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3431 Flags = {VPI->getFastMathFlags()};
3436 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3438 VPI->replaceAllUsesWith(VectorStep);
3444 R->eraseFromParent();
3457 "unsupported early exit VPBB");
3468 "Terminator must be be BranchOnCond");
3469 VPValue *CondOfEarlyExitingVPBB =
3471 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3472 ? CondOfEarlyExitingVPBB
3473 : Builder.createNot(CondOfEarlyExitingVPBB);
3490 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3495 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3496 if (ExitIRI->getNumOperands() != 1) {
3499 ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
3502 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3503 if (!IncomingFromEarlyExit->
isLiveIn()) {
3507 "first.active.lane");
3510 nullptr,
"early.exit.value");
3511 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3521 "Unexpected terminator");
3522 auto *IsLatchExitTaken =
3524 LatchExitingBranch->getOperand(1));
3525 auto *AnyExitTaken = Builder.createNaryOp(
3526 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3528 LatchExitingBranch->eraseFromParent();
3538 Type *RedTy = Ctx.Types.inferScalarType(Red);
3539 VPValue *VecOp = Red->getVecOp();
3542 auto IsExtendedRedValidAndClampRange =
3559 ExtRedCost = Ctx.TTI.getPartialReductionCost(
3560 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
3563 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3564 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3565 Red->getFastMathFlags(),
CostKind);
3567 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3575 IsExtendedRedValidAndClampRange(
3578 Ctx.Types.inferScalarType(
A)))
3598 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3601 Type *RedTy = Ctx.Types.inferScalarType(Red);
3604 auto IsMulAccValidAndClampRange =
3611 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3614 if (IsPartialReduction) {
3616 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
3619 MulAccCost = Ctx.TTI.getPartialReductionCost(
3620 Opcode, SrcTy, SrcTy2, RedTy, VF,
3630 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
3634 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
3636 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
3644 ExtCost += Ext0->computeCost(VF, Ctx);
3646 ExtCost += Ext1->computeCost(VF, Ctx);
3648 ExtCost += OuterExt->computeCost(VF, Ctx);
3650 return MulAccCost.
isValid() &&
3651 MulAccCost < ExtCost + MulCost + RedCost;
3656 VPValue *VecOp = Red->getVecOp();
3674 if (!ExtA || ExtB || !ValB->
isLiveIn())
3690 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
3691 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
3692 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
3693 Mul->setOperand(1, ExtB);
3705 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
3710 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
3717 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
3734 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
3743 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
3744 Ext0->getOpcode() == Ext1->getOpcode() &&
3745 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
3747 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
3748 *Ext0, Ext0->getDebugLoc());
3749 NewExt0->insertBefore(Ext0);
3754 Ext->getResultType(), *Ext1, *Ext1,
3755 Ext1->getDebugLoc());
3758 Mul->setOperand(0, NewExt0);
3759 Mul->setOperand(1, NewExt1);
3760 Red->setOperand(1,
Mul);
3773 auto IP = std::next(Red->getIterator());
3774 auto *VPBB = Red->getParent();
3784 Red->replaceAllUsesWith(AbstractR);
3814 for (
VPValue *VPV : VPValues) {
3816 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
3824 if (
User->usesScalars(VPV))
3827 HoistPoint = HoistBlock->
begin();
3831 "All users must be in the vector preheader or dominated by it");
3836 VPV->replaceUsesWithIf(Broadcast,
3837 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
3838 return Broadcast != &U && !U.usesScalars(VPV);
3846 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
3847 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
3881 auto *TCMO = Builder.createNaryOp(
3909 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
3911 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
3918 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
3928 DefR->replaceUsesWithIf(
3929 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
3931 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
3945 for (
VPValue *Def : R.definedValues()) {
3958 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
3960 return U->usesScalars(Def) &&
3963 if (
none_of(Def->users(), IsCandidateUnpackUser))
3970 Unpack->insertAfter(&R);
3971 Def->replaceUsesWithIf(Unpack,
3972 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
3973 return IsCandidateUnpackUser(&U);
3983 bool RequiresScalarEpilogue) {
3985 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
4004 if (TailByMasking) {
4005 TC = Builder.createNaryOp(
4007 {TC, Builder.createNaryOp(Instruction::Sub,
4018 Builder.createNaryOp(Instruction::URem, {TC, Step},
4027 if (RequiresScalarEpilogue) {
4029 "requiring scalar epilogue is not supported with fail folding");
4032 R = Builder.createSelect(IsZero, Step, R);
4035 VPValue *Res = Builder.createNaryOp(
4054 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4061 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4065 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4070 VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
4080 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4088 const SCEV *Expr = ExpSCEV->getSCEV();
4091 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4096 ExpSCEV->eraseFromParent();
4099 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4100 "after any VPIRInstructions");
4103 auto EI = Entry->begin();
4113 return ExpandedSCEVs;
4133 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4142 unsigned VectorRegWidth) {
4143 if (!InterleaveR || InterleaveR->
getMask())
4146 Type *GroupElementTy =
nullptr;
4150 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4151 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4158 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4159 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4166 return IG->getFactor() == VF && IG->getNumMembers() == VF &&
4167 GroupSize == VectorRegWidth;
4175 return RepR && RepR->isSingleScalar();
4179 unsigned VectorRegWidth) {
4204 if (R.mayWriteToMemory() && !InterleaveR)
4226 if (InterleaveR->getStoredValues().empty())
4231 auto *Member0 = InterleaveR->getStoredValues()[0];
4233 all_of(InterleaveR->getStoredValues(),
4234 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4242 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4245 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4246 return IR && IR->getInterleaveGroup()->isFull() &&
4247 IR->getVPValue(Op.index()) == Op.value();
4256 InterleaveR->getStoredValues()[0]->getDefiningRecipe());
4259 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4261 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4262 R->getNumOperands() > 2)
4265 [WideMember0, Idx =
I](
const auto &
P) {
4266 const auto &[OpIdx, OpV] = P;
4267 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4274 if (StoreGroups.
empty())
4280 auto *R = V->getDefiningRecipe();
4289 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4290 false, LI->getAlign(), {}, LoadGroup->getDebugLoc());
4291 L->insertBefore(LoadGroup);
4297 assert(RepR->isSingleScalar() &&
4299 "must be a single scalar load");
4300 NarrowedOps.
insert(RepR);
4305 VPValue *PtrOp = WideLoad->getAddr();
4307 PtrOp = VecPtr->getOperand(0);
4312 nullptr, *WideLoad);
4313 N->insertBefore(WideLoad);
4319 for (
auto *StoreGroup : StoreGroups) {
4321 VPValue *Member0 = StoreGroup->getStoredValues()[0];
4324 }
else if (
auto *WideMember0 =
4326 for (
unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)
4327 WideMember0->setOperand(Idx, NarrowOp(WideMember0->getOperand(Idx)));
4330 Res = NarrowOp(Member0);
4336 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
4337 false,
SI->getAlign(), {}, StoreGroup->getDebugLoc());
4338 S->insertBefore(StoreGroup);
4339 StoreGroup->eraseFromParent();
4357 Inc->setOperand(1, UF);
4376 "must have a BranchOnCond");
4379 if (VF.
isScalable() && VScaleForTuning.has_value())
4380 VectorStep *= *VScaleForTuning;
4381 assert(VectorStep > 0 &&
"trip count should not be zero");
4385 MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
4397 if (WideIntOrFp && WideIntOrFp->getTruncInst())
4404 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
4407 Start, VectorTC, Step);
4420 {EndValue, Start}, WideIV->
getDebugLoc(),
"bc.resume.val");
4421 return ResumePhiRecipe;
4433 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4444 WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
4447 IVEndValues[WideIVR] = ResumePhi->getOperand(0);
4448 ScalarPhiIRI->addOperand(ResumePhi);
4455 "should only skip truncated wide inductions");
4463 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
4465 "Cannot handle loops with uncountable early exits");
4469 "vector.recur.extract");
4470 StringRef Name = IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx";
4472 {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
4482 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
4483 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4495 "Cannot handle loops with uncountable early exits");
4567 for (
VPUser *U : FOR->users()) {
4581 {},
"vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLanePerPart, Op0_t > m_ExtractLastLanePerPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...