56 if (!VPBB->getParent())
59 auto EndIter = Term ? Term->getIterator() : VPBB->end();
64 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
83 Ingredient.getDebugLoc());
86 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
87 nullptr ,
false ,
false , *VPI,
88 Ingredient.getDebugLoc());
91 Ingredient.getDebugLoc());
99 *VPI, CI->getDebugLoc());
102 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
106 *VPI, Ingredient.getDebugLoc());
110 "inductions must be created earlier");
119 "Only recpies with zero or one defined values expected");
120 Ingredient.eraseFromParent();
137 if (
A->getOpcode() != Instruction::Store ||
138 B->getOpcode() != Instruction::Store)
148 const APInt *Distance;
154 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
156 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
162 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
164 auto VFs =
B->getParent()->getPlan()->vectorFactors();
168 return Distance->
abs().
uge(
176 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
177 L(L), TypeInfo(TypeInfo) {}
184 return ExcludeRecipes.contains(&R) ||
185 (Store && isNoAliasViaDistance(Store, &GroupLeader));
198 std::optional<SinkStoreInfo> SinkInfo = {}) {
199 bool CheckReads = SinkInfo.has_value();
208 "Expected at most one successor in block chain");
211 if (SinkInfo && SinkInfo->shouldSkip(R))
215 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
226 if (CheckReads &&
R.mayReadFromMemory() &&
233 Loc->AATags.NoAlias))
253 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
258 return RepR && RepR->getOpcode() == Instruction::Alloca;
267 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
283 if (!ScalarVFOnly && RepR->isSingleScalar())
286 WorkList.
insert({SinkTo, Candidate});
298 for (
auto &Recipe : *VPBB)
300 InsertIfValidSinkCandidate(VPBB,
Op);
304 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
307 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
312 auto UsersOutsideSinkTo =
314 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
316 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
317 return !U->usesFirstLaneOnly(SinkCandidate);
320 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
322 if (NeedsDuplicating) {
326 if (
auto *SinkCandidateRepR =
332 nullptr , *SinkCandidateRepR,
336 Clone = SinkCandidate->
clone();
346 InsertIfValidSinkCandidate(SinkTo,
Op);
356 if (!EntryBB || EntryBB->size() != 1 ||
366 if (EntryBB->getNumSuccessors() != 2)
371 if (!Succ0 || !Succ1)
374 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
376 if (Succ0->getSingleSuccessor() == Succ1)
378 if (Succ1->getSingleSuccessor() == Succ0)
395 if (!Region1->isReplicator())
397 auto *MiddleBasicBlock =
399 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
404 if (!Region2 || !Region2->isReplicator())
409 if (!Mask1 || Mask1 != Mask2)
412 assert(Mask1 && Mask2 &&
"both region must have conditions");
418 if (TransformedRegions.
contains(Region1))
425 if (!Then1 || !Then2)
445 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
451 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
452 Phi1ToMove.eraseFromParent();
455 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
469 TransformedRegions.
insert(Region1);
472 return !TransformedRegions.
empty();
479 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
480 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
481 auto *BlockInMask = PredRecipe->
getMask();
500 RecipeWithoutMask->getDebugLoc());
524 if (RepR->isPredicated())
543 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
557 if (!VPBB->getParent())
561 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
570 R.moveBefore(*PredVPBB, PredVPBB->
end());
572 auto *ParentRegion = VPBB->getParent();
573 if (ParentRegion && ParentRegion->getExiting() == VPBB)
574 ParentRegion->setExiting(PredVPBB);
575 for (
auto *Succ :
to_vector(VPBB->successors())) {
581 return !WorkList.
empty();
588 bool ShouldSimplify =
true;
589 while (ShouldSimplify) {
605 if (!
IV ||
IV->getTruncInst())
620 for (
auto *U : FindMyCast->
users()) {
622 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
623 FoundUserCast = UserCast;
627 FindMyCast = FoundUserCast;
652 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
665 WidenOriginalIV->dropPoisonGeneratingFlags();
678 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
680 if (IsConditionalAssume)
683 if (R.mayHaveSideEffects())
687 return all_of(R.definedValues(),
688 [](
VPValue *V) { return V->getNumUsers() == 0; });
704 if (!PhiR || PhiR->getNumOperands() != 2)
706 VPUser *PhiUser = PhiR->getSingleUser();
710 if (PhiUser !=
Incoming->getDefiningRecipe() ||
713 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
714 PhiR->eraseFromParent();
715 Incoming->getDefiningRecipe()->eraseFromParent();
730 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
740 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
746 if (ResultTy != StepTy) {
753 Builder.setInsertPoint(VecPreheader);
754 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
756 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
762 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
767 Users.insert_range(V->users());
769 return Users.takeVector();
783 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
820 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
821 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
829 Def->operands(),
true,
831 Clone->insertAfter(Def);
832 Def->replaceAllUsesWith(Clone);
843 PtrIV->replaceAllUsesWith(PtrAdd);
850 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
851 return U->usesScalars(WideIV);
857 Plan,
ID.getKind(),
ID.getInductionOpcode(),
859 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
860 WideIV->getDebugLoc(), Builder);
863 if (!HasOnlyVectorVFs) {
865 "plans containing a scalar VF cannot also include scalable VFs");
866 WideIV->replaceAllUsesWith(Steps);
869 WideIV->replaceUsesWithIf(Steps,
870 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
872 return U.usesFirstLaneOnly(WideIV);
873 return U.usesScalars(WideIV);
889 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
894 if (!Def || Def->getNumOperands() != 2)
902 auto IsWideIVInc = [&]() {
903 auto &
ID = WideIV->getInductionDescriptor();
906 VPValue *IVStep = WideIV->getStepValue();
907 switch (
ID.getInductionOpcode()) {
908 case Instruction::Add:
910 case Instruction::FAdd:
913 case Instruction::FSub:
916 case Instruction::Sub: {
936 return IsWideIVInc() ? WideIV :
nullptr;
956 if (WideIntOrFp && WideIntOrFp->getTruncInst())
969 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
970 FirstActiveLaneType,
DL);
972 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
979 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
982 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
984 VPIRValue *Start = WideIV->getStartValue();
985 VPValue *Step = WideIV->getStepValue();
986 EndValue =
B.createDerivedIV(
988 Start, EndValue, Step);
1008 assert(EndValue &&
"end value must have been pre-computed");
1018 VPValue *Step = WideIV->getStepValue();
1021 return B.createNaryOp(Instruction::Sub, {EndValue, Step},
1026 return B.createPtrAdd(EndValue,
1027 B.createNaryOp(Instruction::Sub, {Zero, Step}),
1031 const auto &
ID = WideIV->getInductionDescriptor();
1032 return B.createNaryOp(
1033 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1035 : Instruction::FAdd,
1036 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1051 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1053 if (PredVPBB == MiddleVPBB)
1055 ExitIRI->getOperand(Idx),
1059 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1061 ExitIRI->setOperand(Idx, Escape);
1078 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1081 ExpR->replaceAllUsesWith(V->second);
1082 ExpR->eraseFromParent();
1091 while (!WorkList.
empty()) {
1093 if (!Seen.
insert(Cur).second)
1101 R->eraseFromParent();
1108static std::optional<std::pair<bool, unsigned>>
1111 std::optional<std::pair<bool, unsigned>>>(R)
1114 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1116 return std::make_pair(
true,
I->getVectorIntrinsicID());
1118 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1122 return std::make_pair(
false,
1125 .
Default([](
auto *) {
return std::nullopt; });
1143 Value *V =
Op->getUnderlyingValue();
1149 auto FoldToIRValue = [&]() ->
Value * {
1151 if (OpcodeOrIID->first) {
1152 if (R.getNumOperands() != 2)
1154 unsigned ID = OpcodeOrIID->second;
1155 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1158 unsigned Opcode = OpcodeOrIID->second;
1167 return Folder.FoldSelect(
Ops[0],
Ops[1],
1170 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1172 case Instruction::Select:
1173 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1174 case Instruction::ICmp:
1175 case Instruction::FCmp:
1178 case Instruction::GetElementPtr: {
1181 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1191 case Instruction::ExtractElement:
1198 if (
Value *V = FoldToIRValue())
1199 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1205 VPlan *Plan = Def->getParent()->getPlan();
1212 return Def->replaceAllUsesWith(V);
1218 PredPHI->replaceAllUsesWith(
Op);
1226 if (TruncTy == ATy) {
1227 Def->replaceAllUsesWith(
A);
1236 : Instruction::ZExt;
1239 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1241 Ext->setUnderlyingValue(UnderlyingExt);
1243 Def->replaceAllUsesWith(Ext);
1245 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1246 Def->replaceAllUsesWith(Trunc);
1254 for (
VPUser *U :
A->users()) {
1256 for (
VPValue *VPV : R->definedValues())
1270 Def->replaceAllUsesWith(
X);
1271 Def->eraseFromParent();
1277 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1281 return Def->replaceAllUsesWith(
X);
1285 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1289 return Def->replaceAllUsesWith(Def->getOperand(1));
1296 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1297 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1298 return Def->replaceAllUsesWith(
1299 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1303 return Def->replaceAllUsesWith(Plan->
getFalse());
1306 return Def->replaceAllUsesWith(
X);
1311 return Def->replaceAllUsesWith(Builder.createNot(
C));
1315 Def->setOperand(0,
C);
1316 Def->setOperand(1,
Y);
1317 Def->setOperand(2,
X);
1326 X->hasMoreThanOneUniqueUser())
1327 return Def->replaceAllUsesWith(
1328 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1331 return Def->replaceAllUsesWith(
A);
1334 return Def->replaceAllUsesWith(
A);
1337 return Def->replaceAllUsesWith(
1338 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1342 return Def->replaceAllUsesWith(Builder.createNaryOp(
1344 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1349 const VPRegionBlock *ParentRegion = Def->getParent()->getParent();
1350 bool IsInReplicateRegion = ParentRegion && ParentRegion->
isReplicator();
1353 return Def->replaceAllUsesWith(Builder.createNaryOp(
1355 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())}, {},
1356 Def->getDebugLoc()));
1360 return Def->replaceAllUsesWith(
A);
1375 R->setOperand(1,
Y);
1376 R->setOperand(2,
X);
1380 R->replaceAllUsesWith(Cmp);
1385 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1386 Cmp->setDebugLoc(Def->getDebugLoc());
1398 if (
Op->getNumUsers() > 1 ||
1402 }
else if (!UnpairedCmp) {
1403 UnpairedCmp =
Op->getDefiningRecipe();
1407 UnpairedCmp =
nullptr;
1414 if (NewOps.
size() < Def->getNumOperands()) {
1416 return Def->replaceAllUsesWith(NewAnyOf);
1428 return Def->replaceAllUsesWith(NewCmp);
1436 return Def->replaceAllUsesWith(Def->getOperand(1));
1442 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1443 Def->replaceAllUsesWith(
X);
1453 Def->setOperand(1, Def->getOperand(0));
1454 Def->setOperand(0,
Y);
1459 if (Phi->getOperand(0) == Phi->getOperand(1))
1460 Phi->replaceAllUsesWith(Phi->getOperand(0));
1468 Def->replaceAllUsesWith(
1469 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1473 return Def->replaceAllUsesWith(
A);
1479 Def->replaceAllUsesWith(
1480 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1487 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1492 Def->replaceAllUsesWith(
1502 "broadcast operand must be single-scalar");
1503 Def->setOperand(0,
C);
1508 if (Phi->getNumOperands() == 1)
1509 Phi->replaceAllUsesWith(Phi->getOperand(0));
1514 if (Def->getNumOperands() == 1 &&
1516 return Def->replaceAllUsesWith(IRV);
1529 return Def->replaceAllUsesWith(
A);
1532 Def->replaceAllUsesWith(Builder.createNaryOp(
1533 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1541 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1542 Phi->getSingleUser() == Def) {
1543 Phi->setOperand(0,
Y);
1544 Def->replaceAllUsesWith(Phi);
1559 Steps->replaceAllUsesWith(Steps->getOperand(0));
1567 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1569 return PhiR && PhiR->isInLoop();
1575 Def->replaceAllUsesWith(
A);
1584 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1585 return Def->replaceAllUsesWith(
A);
1589 return Def->replaceAllUsesWith(
A);
1618 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1627 !WidenStoreR->isConsecutive()) {
1628 assert(!WidenStoreR->isReverse() &&
1629 "Not consecutive memory recipes shouldn't be reversed");
1630 VPValue *Mask = WidenStoreR->getMask();
1639 {WidenStoreR->getOperand(1)});
1644 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1645 true ,
nullptr , {},
1647 ScalarStore->insertBefore(WidenStoreR);
1648 WidenStoreR->eraseFromParent();
1656 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1657 true ,
nullptr , *RepR ,
1658 *RepR , RepR->getDebugLoc());
1659 Clone->insertBefore(RepOrWidenR);
1661 VPValue *ExtractOp = Clone->getOperand(0);
1667 Clone->setOperand(0, ExtractOp);
1668 RepR->eraseFromParent();
1681 if (!
all_of(RepOrWidenR->users(),
1682 [RepOrWidenR](
const VPUser *U) {
1683 if (auto *VPI = dyn_cast<VPInstruction>(U)) {
1684 unsigned Opcode = VPI->getOpcode();
1685 if (Opcode == VPInstruction::ExtractLastLane ||
1686 Opcode == VPInstruction::ExtractLastPart ||
1687 Opcode == VPInstruction::ExtractPenultimateElement)
1691 return U->usesScalars(RepOrWidenR);
1694 if (Op->getSingleUser() != RepOrWidenR)
1698 auto *IRV = dyn_cast<VPIRValue>(Op);
1699 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1700 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1701 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1706 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1707 true ,
nullptr, *RepOrWidenR);
1708 Clone->insertBefore(RepOrWidenR);
1709 RepOrWidenR->replaceAllUsesWith(Clone);
1711 RepOrWidenR->eraseFromParent();
1747 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1748 UniqueValues.
insert(Blend->getIncomingValue(0));
1749 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1751 UniqueValues.
insert(Blend->getIncomingValue(
I));
1753 if (UniqueValues.
size() == 1) {
1754 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1755 Blend->eraseFromParent();
1759 if (Blend->isNormalized())
1765 unsigned StartIndex = 0;
1766 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1771 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1778 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1780 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1781 if (
I == StartIndex)
1783 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1784 OperandsWithMask.
push_back(Blend->getMask(
I));
1789 OperandsWithMask, Blend->getDebugLoc());
1790 NewBlend->insertBefore(&R);
1792 VPValue *DeadMask = Blend->getMask(StartIndex);
1794 Blend->eraseFromParent();
1799 if (NewBlend->getNumOperands() == 3 &&
1801 VPValue *Inc0 = NewBlend->getOperand(0);
1802 VPValue *Inc1 = NewBlend->getOperand(1);
1803 VPValue *OldMask = NewBlend->getOperand(2);
1804 NewBlend->setOperand(0, Inc1);
1805 NewBlend->setOperand(1, Inc0);
1806 NewBlend->setOperand(2, NewMask);
1833 APInt MaxVal = AlignedTC - 1;
1836 unsigned NewBitWidth =
1842 bool MadeChange =
false;
1851 if (!WideIV || !WideIV->isCanonical() ||
1852 WideIV->hasMoreThanOneUniqueUser() ||
1853 NewIVTy == WideIV->getScalarType())
1858 VPUser *SingleUser = WideIV->getSingleUser();
1867 WideIV->setStartValue(NewStart);
1869 WideIV->setStepValue(NewStep);
1875 Cmp->setOperand(1, NewBTC);
1889 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1891 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
1904 const SCEV *VectorTripCount =
1909 "Trip count SCEV must be computable");
1930 auto *Term = &ExitingVPBB->
back();
1943 for (
unsigned Part = 0; Part < UF; ++Part) {
1951 Extracts[Part] = Ext;
1963 match(Phi->getBackedgeValue(),
1965 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1978 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1985 "Expected incoming values of Phi to be ActiveLaneMasks");
1990 EntryALM->setOperand(2, ALMMultiplier);
1991 LoopALM->setOperand(2, ALMMultiplier);
1995 ExtractFromALM(EntryALM, EntryExtracts);
2000 ExtractFromALM(LoopALM, LoopExtracts);
2002 Not->setOperand(0, LoopExtracts[0]);
2005 for (
unsigned Part = 0; Part < UF; ++Part) {
2006 Phis[Part]->setStartValue(EntryExtracts[Part]);
2007 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2020 auto *Term = &ExitingVPBB->
back();
2027 const SCEV *VectorTripCount =
2033 "Trip count SCEV must be computable");
2058 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
2059 return R->isCanonical();
2060 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
2061 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
2067 R->getScalarType());
2069 HeaderR.eraseFromParent();
2073 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
2074 HeaderR.eraseFromParent();
2084 B->setParent(
nullptr);
2093 if (Exits.
size() != 1) {
2095 "BranchOnTwoConds needs 2 remaining exits");
2097 Term->getOperand(0));
2106 Term->setOperand(1, Plan.
getTrue());
2111 {}, {}, Term->getDebugLoc());
2115 Term->eraseFromParent();
2142 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2152 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2153 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2162 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
2177 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2180 if (SinkCandidate == Previous)
2184 !Seen.
insert(SinkCandidate).second ||
2197 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2200 "only recipes with a single defined value expected");
2215 if (SinkCandidate == FOR)
2218 SinkCandidate->moveAfter(Previous);
2219 Previous = SinkCandidate;
2237 for (
VPUser *U : FOR->users()) {
2243 [&VPDT, HoistPoint](
VPUser *U) {
2244 auto *R = cast<VPRecipeBase>(U);
2245 return HoistPoint == R ||
2246 VPDT.properlyDominates(HoistPoint, R);
2248 "HoistPoint must dominate all users of FOR");
2250 auto NeedsHoisting = [HoistPoint, &VPDT,
2252 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2253 if (!HoistCandidate)
2258 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2259 "CFG in VPlan should still be flat, without replicate regions");
2261 if (!Visited.
insert(HoistCandidate).second)
2273 return HoistCandidate;
2282 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2285 "only recipes with a single defined value expected");
2297 if (
auto *R = NeedsHoisting(
Op)) {
2300 if (R->getNumDefinedValues() != 1)
2314 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2333 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2336 while (
auto *PrevPhi =
2338 assert(PrevPhi->getParent() == FOR->getParent());
2340 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2358 {FOR, FOR->getBackedgeValue()});
2360 FOR->replaceAllUsesWith(RecurSplice);
2363 RecurSplice->setOperand(0, FOR);
2369 for (
VPUser *U : RecurSplice->users()) {
2380 B.createNaryOp(Instruction::Sub, {LastActiveLane, One});
2381 VPValue *PenultimateLastIter =
2383 {PenultimateIndex, FOR->getBackedgeValue()});
2388 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2401 RecurKind RK = PhiR->getRecurrenceKind();
2408 RecWithFlags->dropPoisonGeneratingFlags();
2414struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2416 return Def == getEmptyKey() || Def == getTombstoneKey();
2427 return GEP->getSourceElementType();
2430 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2431 [](
auto *
I) {
return I->getSourceElementType(); })
2432 .
Default([](
auto *) {
return nullptr; });
2436 static bool canHandle(
const VPSingleDefRecipe *Def) {
2445 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2446 C->second == Instruction::ExtractValue)))
2452 return !
Def->mayReadFromMemory();
2456 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2457 const VPlan *Plan =
Def->getParent()->getPlan();
2458 VPTypeAnalysis TypeInfo(*Plan);
2461 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2464 if (RFlags->hasPredicate())
2470 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2473 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2475 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2477 !
equal(
L->operands(),
R->operands()))
2480 "must have valid opcode info for both recipes");
2482 if (LFlags->hasPredicate() &&
2483 LFlags->getPredicate() !=
2489 const VPRegionBlock *RegionL =
L->getRegion();
2490 const VPRegionBlock *RegionR =
R->getRegion();
2493 L->getParent() !=
R->getParent())
2495 const VPlan *Plan =
L->getParent()->getPlan();
2496 VPTypeAnalysis TypeInfo(*Plan);
2497 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2512 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2516 if (!VPDT.
dominates(V->getParent(), VPBB))
2521 Def->replaceAllUsesWith(V);
2540 "Expected vector prehader's successor to be the vector loop region");
2547 return !Op->isDefinedOutsideLoopRegions();
2550 R.moveBefore(*Preheader, Preheader->
end());
2573 VPValue *ResultVPV = R.getVPSingleValue();
2575 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2576 if (!NewResSizeInBits)
2589 (void)OldResSizeInBits;
2597 VPW->dropPoisonGeneratingFlags();
2599 if (OldResSizeInBits != NewResSizeInBits &&
2604 Ext->insertAfter(&R);
2606 Ext->setOperand(0, ResultVPV);
2607 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2610 "Only ICmps should not need extending the result.");
2620 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2621 auto *
Op = R.getOperand(Idx);
2622 unsigned OpSizeInBits =
2624 if (OpSizeInBits == NewResSizeInBits)
2626 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2627 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2629 R.setOperand(Idx, ProcessedIter->second);
2637 Builder.setInsertPoint(&R);
2639 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2640 ProcessedIter->second = NewOp;
2641 R.setOperand(Idx, NewOp);
2656 assert(VPBB->getNumSuccessors() == 2 &&
2657 "Two successors expected for BranchOnCond");
2658 unsigned RemovedIdx;
2669 "There must be a single edge between VPBB and its successor");
2678 VPBB->back().eraseFromParent();
2740 VPValue *StartV = CanonicalIVPHI->getStartValue();
2742 auto *CanonicalIVIncrement =
2746 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2747 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2757 VPValue *TripCount, *IncrementValue;
2762 IncrementValue = CanonicalIVIncrement;
2768 IncrementValue = CanonicalIVPHI;
2772 auto *EntryIncrement = Builder.createOverflowingOp(
2780 {EntryIncrement, TC, ALMMultiplier},
DL,
2781 "active.lane.mask.entry");
2787 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2792 Builder.setInsertPoint(OriginalTerminator);
2793 auto *InLoopIncrement =
2795 {IncrementValue}, {
false,
false},
DL);
2797 {InLoopIncrement, TripCount, ALMMultiplier},
2798 DL,
"active.lane.mask.next");
2803 auto *NotMask = Builder.createNot(ALM,
DL);
2816 auto *FoundWidenCanonicalIVUser =
find_if(
2820 "Must have at most one VPWideCanonicalIVRecipe");
2821 if (FoundWidenCanonicalIVUser !=
2823 auto *WideCanonicalIV =
2825 WideCanonicalIVs.
push_back(WideCanonicalIV);
2833 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2834 WideCanonicalIVs.
push_back(WidenOriginalIV);
2840 for (
auto *Wide : WideCanonicalIVs) {
2841 for (
VPUser *U : Wide->users()) {
2846 assert(VPI->getOperand(0) == Wide &&
2847 "WidenCanonicalIV must be the first operand of the compare");
2848 assert(!HeaderMask &&
"Multiple header masks found?");
2856 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2859 UseActiveLaneMaskForControlFlow) &&
2860 "DataAndControlFlowWithoutRuntimeCheck implies "
2861 "UseActiveLaneMaskForControlFlow");
2864 auto *FoundWidenCanonicalIVUser =
find_if(
2866 assert(FoundWidenCanonicalIVUser &&
2867 "Must have widened canonical IV when tail folding!");
2869 auto *WideCanonicalIV =
2872 if (UseActiveLaneMaskForControlFlow) {
2882 nullptr,
"active.lane.mask");
2898 template <
typename OpTy>
bool match(OpTy *V)
const {
2909template <
typename Op0_t,
typename Op1_t>
2928 VPValue *Addr, *Mask, *EndPtr;
2931 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2933 EVLEndPtr->insertBefore(&CurRecipe);
2934 EVLEndPtr->setOperand(1, &EVL);
2938 if (
match(&CurRecipe,
2952 LoadR->insertBefore(&CurRecipe);
2954 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
2963 StoredVal, EVL, Mask);
2965 if (
match(&CurRecipe,
2971 Intrinsic::experimental_vp_reverse,
2972 {ReversedVal, Plan->
getTrue(), &EVL},
2976 AdjustEndPtr(EndPtr), NewReverse, EVL,
2981 if (Rdx->isConditional() &&
2986 if (Interleave->getMask() &&
2991 if (
match(&CurRecipe,
3000 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3018 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3023 HeaderMask = R.getVPSingleValue();
3035 NewR->insertBefore(R);
3036 for (
auto [Old, New] :
3037 zip_equal(R->definedValues(), NewR->definedValues()))
3038 Old->replaceAllUsesWith(New);
3045 R->eraseFromParent();
3062 "User of VF that we can't transform to EVL.");
3068 [&LoopRegion, &Plan](
VPUser *U) {
3070 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
3071 m_Specific(&Plan.getVFxUF()))) ||
3072 isa<VPWidenPointerInductionRecipe>(U);
3074 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3075 "increment of the canonical induction.");
3091 MaxEVL = Builder.createScalarZExtOrTrunc(
3095 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3096 VPValue *PrevEVL = Builder.createScalarPhi(
3110 Intrinsic::experimental_vp_splice,
3111 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3115 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3132 VPValue *EVLMask = Builder.createICmp(
3193 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3201 VPValue *StartV = CanonicalIVPHI->getStartValue();
3205 EVLPhi->insertAfter(CanonicalIVPHI);
3206 VPBuilder Builder(Header, Header->getFirstNonPhi());
3209 VPPhi *AVLPhi = Builder.createScalarPhi(
3213 if (MaxSafeElements) {
3223 auto *CanonicalIVIncrement =
3225 Builder.setInsertPoint(CanonicalIVIncrement);
3229 OpVPEVL = Builder.createScalarZExtOrTrunc(
3230 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3232 auto *NextEVLIV = Builder.createOverflowingOp(
3233 Instruction::Add, {OpVPEVL, EVLPhi},
3234 {CanonicalIVIncrement->hasNoUnsignedWrap(),
3235 CanonicalIVIncrement->hasNoSignedWrap()},
3236 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
3237 EVLPhi->addOperand(NextEVLIV);
3239 VPValue *NextAVL = Builder.createOverflowingOp(
3240 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
3249 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
3250 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3264 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
3275 [[maybe_unused]]
bool FoundAVL =
3278 assert(FoundAVL &&
"Didn't find AVL?");
3286 [[maybe_unused]]
bool FoundAVLNext =
3289 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3300 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3303 "Unexpected canonical iv");
3309 CanonicalIV->eraseFromParent();
3323 "Expected BranchOnCond with ICmp comparing EVL increment with vector "
3328 LatchExitingBr->setOperand(0,
3340 return R->getRegion() ||
3344 for (
const SCEV *Stride : StridesMap.
values()) {
3347 const APInt *StrideConst;
3370 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3377 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3380 if (NewSCEV != ScevExpr) {
3382 ExpSCEV->replaceAllUsesWith(NewExp);
3391 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3395 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3400 while (!Worklist.
empty()) {
3403 if (!Visited.
insert(CurRec).second)
3425 RecWithFlags->isDisjoint()) {
3428 Instruction::Add, {
A,
B}, {
false,
false},
3429 RecWithFlags->getDebugLoc());
3430 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3431 RecWithFlags->replaceAllUsesWith(New);
3432 RecWithFlags->eraseFromParent();
3435 RecWithFlags->dropPoisonGeneratingFlags();
3440 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3441 "found instruction with poison generating flags not covered by "
3442 "VPRecipeWithIRFlags");
3447 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3459 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3460 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3461 if (AddrDef && WidenRec->isConsecutive() &&
3462 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3463 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3465 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3469 InterleaveRec->getInterleaveGroup();
3470 bool NeedPredication =
false;
3472 I < NumMembers; ++
I) {
3475 NeedPredication |= BlockNeedsPredication(Member->getParent());
3478 if (NeedPredication)
3479 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3491 if (InterleaveGroups.empty())
3498 for (
const auto *IG : InterleaveGroups) {
3504 StoredValues.
push_back(StoreR->getStoredValue());
3505 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3512 StoredValues.
push_back(StoreR->getStoredValue());
3516 bool NeedsMaskForGaps =
3517 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3518 (!StoredValues.
empty() && !IG->isFull());
3530 VPValue *Addr = Start->getAddr();
3539 assert(IG->getIndex(IRInsertPos) != 0 &&
3540 "index of insert position shouldn't be zero");
3544 IG->getIndex(IRInsertPos),
3548 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3554 if (IG->isReverse()) {
3557 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3558 ReversePtr->insertBefore(InsertPos);
3562 InsertPos->getMask(), NeedsMaskForGaps,
3563 InterleaveMD, InsertPos->getDebugLoc());
3564 VPIG->insertBefore(InsertPos);
3567 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3570 if (!Member->getType()->isVoidTy()) {
3629 AddOp = Instruction::Add;
3630 MulOp = Instruction::Mul;
3632 AddOp =
ID.getInductionOpcode();
3633 MulOp = Instruction::FMul;
3641 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3642 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3644 Flags.dropPoisonGeneratingFlags();
3653 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3658 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3659 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3665 WidePHI->insertBefore(WidenIVR);
3676 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3680 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3683 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3686 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3693 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3696 WidePHI->addOperand(
Next);
3724 VPlan *Plan = R->getParent()->getPlan();
3725 VPValue *Start = R->getStartValue();
3726 VPValue *Step = R->getStepValue();
3727 VPValue *VF = R->getVFValue();
3729 assert(R->getInductionDescriptor().getKind() ==
3731 "Not a pointer induction according to InductionDescriptor!");
3734 "Recipe should have been replaced");
3740 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3744 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3747 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3748 VPValue *PtrAdd = Builder.createNaryOp(
3750 R->replaceAllUsesWith(PtrAdd);
3755 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3757 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3760 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3769 if (!R->isReplicator())
3773 R->dissolveToCFGLoop();
3794 assert(Br->getNumOperands() == 2 &&
3795 "BranchOnTwoConds must have exactly 2 conditions");
3799 assert(Successors.size() == 3 &&
3800 "BranchOnTwoConds must have exactly 3 successors");
3805 VPValue *Cond0 = Br->getOperand(0);
3806 VPValue *Cond1 = Br->getOperand(1);
3811 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
3824 Br->eraseFromParent();
3847 WidenIVR->replaceAllUsesWith(PtrAdd);
3860 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3861 Select = Builder.createSelect(Blend->getMask(
I),
3862 Blend->getIncomingValue(
I),
Select,
3863 R.getDebugLoc(),
"predphi");
3864 Blend->replaceAllUsesWith(
Select);
3879 for (
VPValue *
Op : LastActiveL->operands()) {
3880 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
3885 VPValue *FirstInactiveLane = Builder.createNaryOp(
3887 LastActiveL->getDebugLoc(),
"first.inactive.lane");
3892 VPValue *LastLane = Builder.createNaryOp(
3893 Instruction::Sub, {FirstInactiveLane, One},
3894 LastActiveL->getDebugLoc(),
"last.active.lane");
3905 DebugLoc DL = BranchOnCountInst->getDebugLoc();
3908 ToRemove.push_back(BranchOnCountInst);
3923 ? Instruction::UIToFP
3924 : Instruction::Trunc;
3925 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3931 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3936 Flags = {VPI->getFastMathFlags()};
3941 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3943 VPI->replaceAllUsesWith(VectorStep);
3949 R->eraseFromParent();
3962 "unsupported early exit VPBB");
3973 "Terminator must be be BranchOnCond");
3974 VPValue *CondOfEarlyExitingVPBB =
3976 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3977 ? CondOfEarlyExitingVPBB
3978 : Builder.createNot(CondOfEarlyExitingVPBB);
3992 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3997 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3998 if (ExitIRI->getNumOperands() != 1) {
4001 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
4004 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
4013 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
4023 "Unexpected terminator");
4024 auto *IsLatchExitTaken =
4026 LatchExitingBranch->getOperand(1));
4028 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4029 LatchExitingBranch->eraseFromParent();
4031 Builder.setInsertPoint(LatchVPBB);
4033 {IsEarlyExitTaken, IsLatchExitTaken}, LatchDL);
4035 LatchVPBB->
setSuccessors({VectorEarlyExitVPBB, MiddleVPBB, HeaderVPBB});
4046 Type *RedTy = Ctx.Types.inferScalarType(Red);
4047 VPValue *VecOp = Red->getVecOp();
4050 auto IsExtendedRedValidAndClampRange =
4062 if (Red->isPartialReduction()) {
4067 ExtRedCost = Ctx.TTI.getPartialReductionCost(
4068 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
4071 ? std::optional{Red->getFastMathFlags()}
4075 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4076 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4077 Red->getFastMathFlags(),
CostKind);
4079 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4089 IsExtendedRedValidAndClampRange(
4092 Ctx.Types.inferScalarType(
A)))
4111 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4112 Opcode != Instruction::FAdd)
4115 Type *RedTy = Ctx.Types.inferScalarType(Red);
4118 auto IsMulAccValidAndClampRange =
4125 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4128 if (Red->isPartialReduction()) {
4130 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
4133 MulAccCost = Ctx.TTI.getPartialReductionCost(
4134 Opcode, SrcTy, SrcTy2, RedTy, VF,
4143 ? std::optional{Red->getFastMathFlags()}
4149 (Ext0->getOpcode() != Ext1->getOpcode() ||
4150 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4154 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4156 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4164 ExtCost += Ext0->computeCost(VF, Ctx);
4166 ExtCost += Ext1->computeCost(VF, Ctx);
4168 ExtCost += OuterExt->computeCost(VF, Ctx);
4170 return MulAccCost.
isValid() &&
4171 MulAccCost < ExtCost + MulCost + RedCost;
4176 VPValue *VecOp = Red->getVecOp();
4183 assert(Opcode == Instruction::FAdd &&
4184 "MulAccumulateReduction from an FMul must accumulate into an FAdd "
4193 if (RecipeA && RecipeB &&
4194 IsMulAccValidAndClampRange(
FMul, RecipeA, RecipeB,
nullptr)) {
4231 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4232 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4233 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4234 Mul->setOperand(1, ExtB);
4244 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4249 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4256 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4273 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4282 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4283 Ext0->getOpcode() == Ext1->getOpcode() &&
4284 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4286 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4287 *Ext0, *Ext0, Ext0->getDebugLoc());
4288 NewExt0->insertBefore(Ext0);
4293 Ext->getResultType(),
nullptr, *Ext1,
4294 *Ext1, Ext1->getDebugLoc());
4297 Mul->setOperand(0, NewExt0);
4298 Mul->setOperand(1, NewExt1);
4299 Red->setOperand(1,
Mul);
4312 auto IP = std::next(Red->getIterator());
4313 auto *VPBB = Red->getParent();
4323 Red->replaceAllUsesWith(AbstractR);
4353 for (
VPValue *VPV : VPValues) {
4362 if (
User->usesScalars(VPV))
4365 HoistPoint = HoistBlock->
begin();
4369 "All users must be in the vector preheader or dominated by it");
4374 VPV->replaceUsesWithIf(Broadcast,
4375 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4376 return Broadcast != &U && !U.usesScalars(VPV);
4393 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4394 RepR->getOpcode() != Instruction::Load)
4397 VPValue *Addr = RepR->getOperand(0);
4400 if (!
Loc.AATags.Scope)
4405 if (R.mayWriteToMemory()) {
4407 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4415 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4419 const AAMDNodes &LoadAA = LoadLoc.AATags;
4435 return CommonMetadata;
4438template <
unsigned Opcode>
4443 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4444 "Only Load and Store opcodes supported");
4445 constexpr bool IsLoad = (Opcode == Instruction::Load);
4455 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
4459 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
4462 RecipesByAddress[AddrSCEV].push_back(RepR);
4469 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4471 for (
auto &[Addr, Recipes] : RecipesByAddress) {
4472 if (Recipes.size() < 2)
4480 VPValue *MaskI = RecipeI->getMask();
4481 Type *TypeI = GetLoadStoreValueType(RecipeI);
4487 bool HasComplementaryMask =
false;
4492 VPValue *MaskJ = RecipeJ->getMask();
4493 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4494 if (TypeI == TypeJ) {
4504 if (HasComplementaryMask) {
4505 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4515template <
typename InstType>
4535 for (
auto &Group :
Groups) {
4560 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4561 false,
nullptr, *EarliestLoad,
4564 UnpredicatedLoad->insertBefore(EarliestLoad);
4568 Load->replaceAllUsesWith(UnpredicatedLoad);
4569 Load->eraseFromParent();
4579 if (!StoreLoc || !StoreLoc->AATags.Scope)
4585 StoresToSink.
end());
4589 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4604 for (
auto &Group :
Groups) {
4621 VPValue *SelectedValue = Group[0]->getOperand(0);
4624 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4625 VPValue *Mask = Group[
I]->getMask();
4627 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4635 auto *UnpredicatedStore =
4637 {SelectedValue, LastStore->getOperand(1)},
4639 nullptr, *LastStore, CommonMetadata);
4640 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4644 Store->eraseFromParent();
4651 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4652 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4686 auto *TCMO = Builder.createNaryOp(
4714 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4716 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4723 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4733 DefR->replaceUsesWithIf(
4734 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4736 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4750 for (
VPValue *Def : R.definedValues()) {
4763 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4765 return U->usesScalars(Def) &&
4768 if (
none_of(Def->users(), IsCandidateUnpackUser))
4775 Unpack->insertAfter(&R);
4776 Def->replaceUsesWithIf(Unpack,
4777 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4778 return IsCandidateUnpackUser(&U);
4788 bool RequiresScalarEpilogue) {
4808 if (TailByMasking) {
4809 TC = Builder.createNaryOp(
4811 {TC, Builder.createNaryOp(Instruction::Sub,
4822 Builder.createNaryOp(Instruction::URem, {TC, Step},
4831 if (RequiresScalarEpilogue) {
4833 "requiring scalar epilogue is not supported with fail folding");
4836 R = Builder.createSelect(IsZero, Step, R);
4839 VPValue *Res = Builder.createNaryOp(
4858 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4865 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4869 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4874 VPValue *MulByUF = Builder.createOverflowingOp(
4875 Instruction::Mul, {RuntimeVF, UF}, {
true,
false});
4884 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4892 const SCEV *Expr = ExpSCEV->getSCEV();
4895 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4900 ExpSCEV->eraseFromParent();
4903 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4904 "before any VPIRInstructions");
4907 auto EI = Entry->begin();
4917 return ExpandedSCEVs;
4933 return Member0Op == OpV;
4935 return !W->getMask() && Member0Op == OpV;
4937 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4948 if (!InterleaveR || InterleaveR->
getMask())
4951 Type *GroupElementTy =
nullptr;
4955 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4956 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4963 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4964 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4973 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4974 GroupSize == VectorRegWidth;
4982 return RepR && RepR->isSingleScalar();
4989 auto *R = V->getDefiningRecipe();
4997 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4998 WideMember0->setOperand(
5007 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5009 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
5010 false, {}, LoadGroup->getDebugLoc());
5011 L->insertBefore(LoadGroup);
5017 assert(RepR->isSingleScalar() &&
5019 "must be a single scalar load");
5020 NarrowedOps.
insert(RepR);
5025 VPValue *PtrOp = WideLoad->getAddr();
5027 PtrOp = VecPtr->getOperand(0);
5032 nullptr, {}, *WideLoad);
5033 N->insertBefore(WideLoad);
5063 if (R.mayWriteToMemory() && !InterleaveR)
5085 if (InterleaveR->getStoredValues().empty())
5090 auto *Member0 = InterleaveR->getStoredValues()[0];
5100 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5103 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5104 return IR && IR->getInterleaveGroup()->isFull() &&
5105 IR->getVPValue(Op.index()) == Op.value();
5117 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
5119 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
5120 R->getNumOperands() > 2)
5123 [WideMember0, Idx =
I](
const auto &
P) {
5124 const auto &[OpIdx, OpV] = P;
5125 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
5132 if (StoreGroups.
empty())
5138 for (
auto *StoreGroup : StoreGroups) {
5144 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5145 false, {}, StoreGroup->getDebugLoc());
5146 S->insertBefore(StoreGroup);
5147 StoreGroup->eraseFromParent();
5162 Instruction::Mul, {VScale, UF}, {
true,
false});
5166 Inc->setOperand(1, UF);
5185 "must have a BranchOnCond");
5188 if (VF.
isScalable() && VScaleForTuning.has_value())
5189 VectorStep *= *VScaleForTuning;
5190 assert(VectorStep > 0 &&
"trip count should not be zero");
5194 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5207 if (WideIntOrFp && WideIntOrFp->getTruncInst())
5214 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
5217 Start, VectorTC, Step);
5240 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5250 IVEndValues[WideIVR] = EndValue;
5251 ResumePhiR->setOperand(0, EndValue);
5252 ResumePhiR->setName(
"bc.resume.val");
5259 "should only skip truncated wide inductions");
5267 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
5269 "Cannot handle loops with uncountable early exits");
5275 "vector.recur.extract");
5277 ResumePhiR->setName(IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx");
5278 ResumePhiR->setOperand(0, ResumeFromVectorLoop);
5287 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
5288 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5300 "Cannot handle loops with uncountable early exits");
5373 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5387 "vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
size_t getNumPredecessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
void setParent(VPRegionBlock *P)
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
bind_ty< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...