57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
82 false , *VPI, Ingredient.getDebugLoc());
85 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
86 nullptr ,
false , *VPI,
87 Ingredient.getDebugLoc());
90 Ingredient.operands(), *VPI,
91 Ingredient.getDebugLoc(),
GEP);
103 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
108 if (VectorID == Intrinsic::assume ||
109 VectorID == Intrinsic::lifetime_end ||
110 VectorID == Intrinsic::lifetime_start ||
111 VectorID == Intrinsic::sideeffect ||
112 VectorID == Intrinsic::pseudoprobe) {
117 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
118 VectorID != Intrinsic::pseudoprobe;
122 Ingredient.getDebugLoc());
125 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
126 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
130 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
134 *VPI, Ingredient.getDebugLoc());
138 "inductions must be created earlier");
147 "Only recpies with zero or one defined values expected");
148 Ingredient.eraseFromParent();
164 if (
A->getOpcode() != Instruction::Store ||
165 B->getOpcode() != Instruction::Store)
175 const APInt *Distance;
181 Type *TyA =
A->getOperand(0)->getScalarType();
183 Type *TyB =
B->getOperand(0)->getScalarType();
189 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
191 auto VFs =
B->getParent()->getPlan()->vectorFactors();
195 return Distance->
abs().
uge(
203 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
211 return ExcludeRecipes.contains(&R) ||
212 (Store && isNoAliasViaDistance(Store, &GroupLeader));
225 std::optional<SinkStoreInfo> SinkInfo = {}) {
226 bool CheckReads = SinkInfo.has_value();
233 if (SinkInfo && SinkInfo->shouldSkip(R))
237 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
262template <
unsigned Opcode>
267 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
268 "Only Load and Store opcodes supported");
269 constexpr bool IsLoad = (Opcode == Instruction::Load);
272 RecipesByAddressAndType;
277 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
281 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
285 RecipesByAddressAndType[{AddrSCEV, LoadStoreTy}].push_back(RepR);
290 for (
auto &Group :
Groups) {
305 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
317 if (Candidate->getParent() == SinkTo ||
322 if (!ScalarVFOnly && RepR->isSingleScalar())
325 WorkList.
insert({SinkTo, Candidate});
337 for (
auto &Recipe : *VPBB)
339 InsertIfValidSinkCandidate(VPBB,
Op);
343 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
346 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
351 auto UsersOutsideSinkTo =
353 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
355 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
356 return !U->usesFirstLaneOnly(SinkCandidate);
359 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
361 if (NeedsDuplicating) {
365 if (
auto *SinkCandidateRepR =
371 nullptr , *SinkCandidateRepR,
375 Clone = SinkCandidate->
clone();
385 InsertIfValidSinkCandidate(SinkTo,
Op);
395 if (!EntryBB || EntryBB->size() != 1 ||
405 if (EntryBB->getNumSuccessors() != 2)
410 if (!Succ0 || !Succ1)
413 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
415 if (Succ0->getSingleSuccessor() == Succ1)
417 if (Succ1->getSingleSuccessor() == Succ0)
434 if (!Region1->isReplicator())
436 auto *MiddleBasicBlock =
438 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
443 if (!Region2 || !Region2->isReplicator())
448 if (!Mask1 || Mask1 != Mask2)
451 assert(Mask1 && Mask2 &&
"both region must have conditions");
457 if (TransformedRegions.
contains(Region1))
464 if (!Then1 || !Then2)
484 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
490 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
491 Phi1ToMove.eraseFromParent();
494 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
508 TransformedRegions.
insert(Region1);
511 return !TransformedRegions.
empty();
519 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
520 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
521 auto *BlockInMask = PredRecipe->
getMask();
542 Region->setParent(ParentRegion);
548 RecipeWithoutMask->getDebugLoc());
549 Exiting->appendRecipe(PHIRecipe);
562 if (RepR->isPredicated())
581 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
593 if (!VPBB->getParent())
597 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
606 R.moveBefore(*PredVPBB, PredVPBB->
end());
608 auto *ParentRegion = VPBB->getParent();
609 if (ParentRegion && ParentRegion->getExiting() == VPBB)
610 ParentRegion->setExiting(PredVPBB);
614 return !WorkList.
empty();
621 bool ShouldSimplify =
true;
622 while (ShouldSimplify) {
638 if (!
IV ||
IV->getTruncInst())
653 for (
auto *U : FindMyCast->
users()) {
655 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
656 FoundUserCast = UserCast;
663 FindMyCast = FoundUserCast;
665 if (FindMyCast !=
IV)
680 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
689 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
695 if (ResultTy != StepTy) {
702 Builder.setInsertPoint(VecPreheader);
703 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
705 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
731 WideCanIV->getDebugLoc(), Builder));
732 WideCanIV->eraseFromParent();
749 WideCanIV->replaceAllUsesWith(WidenIV);
750 WideCanIV->eraseFromParent();
759 if (PHICost > BroadcastCost)
768 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
780 WideCanIV->getNoWrapFlags(), WideCanIV->getDebugLoc());
781 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
782 WideCanIV->replaceAllUsesWith(NewWideIV);
783 WideCanIV->eraseFromParent();
791 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
793 if (IsConditionalAssume)
796 if (R.mayHaveSideEffects())
800 return all_of(R.definedValues(),
801 [](
VPValue *V) { return V->getNumUsers() == 0; });
821 VPUser *PhiUser = PhiR->getSingleUser();
827 PhiR->replaceAllUsesWith(Start);
828 PhiR->eraseFromParent();
836 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
839 Users.insert_range(V->users());
841 return Users.takeVector();
855 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
892 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
893 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
906 Def->operands(),
true,
908 Clone->insertAfter(Def);
909 Def->replaceAllUsesWith(Clone);
920 PtrIV->replaceAllUsesWith(PtrAdd);
927 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
928 return U->usesScalars(WideIV);
934 Plan,
ID.getKind(),
ID.getInductionOpcode(),
936 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
937 WideIV->getDebugLoc(), Builder);
940 if (!HasOnlyVectorVFs) {
942 "plans containing a scalar VF cannot also include scalable VFs");
943 WideIV->replaceAllUsesWith(Steps);
946 WideIV->replaceUsesWithIf(Steps,
947 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
949 return U.usesFirstLaneOnly(WideIV);
950 return U.usesScalars(WideIV);
966 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
971 if (!Def || Def->getNumOperands() != 2)
979 auto IsWideIVInc = [&]() {
980 auto &
ID = WideIV->getInductionDescriptor();
983 VPValue *IVStep = WideIV->getStepValue();
984 switch (
ID.getInductionOpcode()) {
985 case Instruction::Add:
987 case Instruction::FAdd:
989 case Instruction::FSub:
992 case Instruction::Sub: {
1012 return IsWideIVInc() ? WideIV :
nullptr;
1029 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1040 VPValue *FirstActiveLane =
B.createFirstActiveLane(Mask,
DL);
1041 FirstActiveLane =
B.createScalarZExtOrTrunc(
1042 FirstActiveLane, CanonicalIVType, FirstActiveLane->
getScalarType(),
DL);
1043 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1048 if (Incoming != WideIV) {
1050 EndValue =
B.createAdd(EndValue, One,
DL);
1055 VPIRValue *Start = WideIV->getStartValue();
1056 VPValue *Step = WideIV->getStepValue();
1057 EndValue =
B.createDerivedIV(
1059 Start, EndValue, Step);
1073 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1083 Start, VectorTC, Step);
1113 assert(EndValue &&
"Must have computed the end value up front");
1118 if (Incoming != WideIV)
1130 auto *Zero = Plan.
getZero(StepTy);
1131 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1136 return B.createNaryOp(
1137 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1139 : Instruction::FAdd,
1140 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1151 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1161 EndValues[WideIV] = EndValue;
1171 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1172 R.eraseFromParent();
1181 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1183 if (PredVPBB == MiddleVPBB)
1185 Plan, ExitIRI->getOperand(Idx), EndValues, PSE);
1188 Plan, ExitIRI->getOperand(Idx), PSE);
1190 ExitIRI->setOperand(Idx, Escape);
1207 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1211 ExpR->replaceAllUsesWith(V->second);
1215 ExpR->eraseFromParent();
1224 while (!WorkList.
empty()) {
1226 if (!Seen.
insert(Cur).second)
1234 R->eraseFromParent();
1241static std::optional<std::pair<bool, unsigned>>
1244 std::optional<std::pair<bool, unsigned>>>(R)
1247 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1249 return std::make_pair(
true,
I->getVectorIntrinsicID());
1251 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1257 I->getVPRecipeID());
1259 .
Default([](
auto *) {
return std::nullopt; });
1276 Value *V =
Op->getUnderlyingValue();
1282 auto FoldToIRValue = [&]() ->
Value * {
1284 if (OpcodeOrIID->first) {
1285 if (R.getNumOperands() != 2)
1287 unsigned ID = OpcodeOrIID->second;
1288 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1], R.getScalarType());
1290 unsigned Opcode = OpcodeOrIID->second;
1296 R.getVPSingleValue()->getScalarType());
1299 return Folder.FoldSelect(
Ops[0],
Ops[1],
1302 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1304 case Instruction::Select:
1305 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1306 case Instruction::ICmp:
1307 case Instruction::FCmp:
1310 case Instruction::GetElementPtr: {
1313 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1322 case Instruction::ExtractElement:
1329 if (
Value *V = FoldToIRValue())
1330 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1336 bool CanCreateNewRecipe) {
1337 VPlan *Plan = Def->getParent()->getPlan();
1347 Def->replaceAllUsesWith(
X);
1348 Def->eraseFromParent();
1360 Def->replaceAllUsesWith(
X);
1372 Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1378 Def->replaceAllUsesWith(
X);
1384 Def->replaceAllUsesWith(Plan->
getFalse());
1390 Def->replaceAllUsesWith(
X);
1395 if (CanCreateNewRecipe &&
1400 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1401 !Def->getOperand(1)->hasMoreThanOneUniqueUser())) {
1402 Def->replaceAllUsesWith(
1403 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1410 Def->replaceAllUsesWith(Def->getOperand(1));
1417 Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1423 Def->replaceAllUsesWith(Plan->
getFalse());
1428 Def->replaceAllUsesWith(
X);
1434 if (CanCreateNewRecipe &&
1436 Def->replaceAllUsesWith(Builder.createNot(
C));
1442 Def->setOperand(0,
C);
1443 Def->setOperand(1,
Y);
1444 Def->setOperand(2,
X);
1449 if (CanCreateNewRecipe &&
1453 Y->getScalarType()->isIntegerTy(1)) {
1454 Def->replaceAllUsesWith(
1455 Builder.createOr(
Y, Builder.createLogicalAnd(
X, Z)));
1464 VPlan *Plan = Def->getParent()->getPlan();
1471 return Def->replaceAllUsesWith(V);
1477 PredPHI->replaceAllUsesWith(
Op);
1490 bool CanCreateNewRecipe =
1495 Type *TruncTy = Def->getScalarType();
1496 Type *ATy =
A->getScalarType();
1497 if (TruncTy == ATy) {
1498 Def->replaceAllUsesWith(
A);
1507 : Instruction::ZExt;
1510 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1512 Ext->setUnderlyingValue(UnderlyingExt);
1514 Def->replaceAllUsesWith(Ext);
1516 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1517 Def->replaceAllUsesWith(Trunc);
1527 return Def->replaceAllUsesWith(
A);
1530 return Def->replaceAllUsesWith(
A);
1533 return Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1539 return Def->replaceAllUsesWith(Builder.createSub(
1540 Plan->
getZero(
A->getScalarType()),
A, Def->getDebugLoc(),
"", NW));
1543 if (CanCreateNewRecipe &&
1551 ->hasNoSignedWrap()};
1552 return Def->replaceAllUsesWith(
1553 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1559 return Def->replaceAllUsesWith(Builder.createNaryOp(
1561 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1566 return Def->replaceAllUsesWith(Builder.createNaryOp(
1568 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1573 return Def->replaceAllUsesWith(
A);
1588 R->setOperand(1,
Y);
1589 R->setOperand(2,
X);
1593 R->replaceAllUsesWith(Cmp);
1598 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1599 Cmp->setDebugLoc(Def->getDebugLoc());
1611 if (
Op->getNumUsers() > 1 ||
1615 }
else if (!UnpairedCmp) {
1616 UnpairedCmp =
Op->getDefiningRecipe();
1620 UnpairedCmp =
nullptr;
1627 if (NewOps.
size() < Def->getNumOperands()) {
1629 return Def->replaceAllUsesWith(NewAnyOf);
1636 if (CanCreateNewRecipe &&
1642 return Def->replaceAllUsesWith(NewCmp);
1648 Def->getOperand(1)->getScalarType() == Def->getScalarType())
1649 return Def->replaceAllUsesWith(Def->getOperand(1));
1653 Type *WideStepTy = Def->getScalarType();
1654 if (
X->getScalarType() != WideStepTy)
1655 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1656 Def->replaceAllUsesWith(
X);
1665 Def->getScalarType()->isIntegerTy(1)) {
1666 Def->setOperand(1, Def->getOperand(0));
1667 Def->setOperand(0,
Y);
1674 return Def->replaceAllUsesWith(Def->getOperand(0));
1680 Def->replaceAllUsesWith(
1681 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1686 return Def->replaceAllUsesWith(
X);
1689 return Def->replaceAllUsesWith(
A);
1692 return Def->replaceAllUsesWith(
A);
1698 Def->replaceAllUsesWith(
1699 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1706 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1711 Def->replaceAllUsesWith(
1721 "broadcast operand must be single-scalar");
1722 Def->setOperand(0,
C);
1727 return Def->replaceUsesWithIf(
1728 X, [Def](
const VPUser &U,
unsigned) {
return U.usesScalars(Def); });
1731 if (Def->getNumOperands() == 1) {
1732 Def->replaceAllUsesWith(Def->getOperand(0));
1737 Phi->replaceAllUsesWith(Phi->getOperand(0));
1743 if (Def->getNumOperands() == 1 &&
1745 return Def->replaceAllUsesWith(IRV);
1758 return Def->replaceAllUsesWith(
A);
1765 return Def->replaceAllUsesWith(WidenIV->getRegion()->getCanonicalIV());
1768 Def->replaceAllUsesWith(Builder.createNaryOp(
1769 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1783 auto *IVInc = Def->getOperand(0);
1784 if (IVInc->getNumUsers() == 2) {
1789 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1790 Def->replaceAllUsesWith(IVInc);
1792 Inc->replaceAllUsesWith(Phi);
1793 Phi->setOperand(0,
Y);
1809 Steps->replaceAllUsesWith(Steps->getOperand(0));
1817 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1819 return PhiR && PhiR->isInLoop();
1825 return Def->replaceAllUsesWith(
A);
1851 while (!Worklist.
empty()) {
1860 R->replaceAllUsesWith(
1861 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1865static std::optional<Instruction::BinaryOps>
1868 case Intrinsic::masked_udiv:
1869 return Instruction::UDiv;
1870 case Intrinsic::masked_sdiv:
1871 return Instruction::SDiv;
1872 case Intrinsic::masked_urem:
1873 return Instruction::URem;
1874 case Intrinsic::masked_srem:
1875 return Instruction::SRem;
1892 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1896 if (RepR && RepR->getOpcode() == Instruction::Store &&
1899 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1900 true ,
nullptr , *RepR ,
1901 *RepR , RepR->getDebugLoc());
1902 Clone->insertBefore(RepOrWidenR);
1904 VPValue *ExtractOp = Clone->getOperand(0);
1910 Clone->setOperand(0, ExtractOp);
1911 RepR->eraseFromParent();
1923 VPValue *SafeDivisor = Builder.createSelect(
1924 IntrR->getOperand(2), IntrR->getOperand(1),
1926 VPValue *Clone = Builder.createNaryOp(
1927 *
Opc, {IntrR->getOperand(0), SafeDivisor},
1930 IntrR->eraseFromParent();
1939 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1948 return !U->usesScalars(
Op);
1952 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1955 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1956 IntroducesBCastOf(Op)))
1960 auto *IRV = dyn_cast<VPIRValue>(Op);
1961 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1962 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1963 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1968 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1969 true ,
nullptr, *RepOrWidenR);
1970 Clone->insertBefore(RepOrWidenR);
1971 RepOrWidenR->replaceAllUsesWith(Clone);
1973 RepOrWidenR->eraseFromParent();
2009 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
2010 UniqueValues.
insert(Blend->getIncomingValue(0));
2011 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
2013 UniqueValues.
insert(Blend->getIncomingValue(
I));
2015 if (UniqueValues.
size() == 1) {
2016 Blend->replaceAllUsesWith(*UniqueValues.
begin());
2017 Blend->eraseFromParent();
2021 if (Blend->isNormalized())
2027 unsigned StartIndex = 0;
2028 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2033 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
2040 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
2042 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2043 if (
I == StartIndex)
2045 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
2046 OperandsWithMask.
push_back(Blend->getMask(
I));
2051 OperandsWithMask, *Blend, Blend->getDebugLoc());
2052 NewBlend->insertBefore(&R);
2054 VPValue *DeadMask = Blend->getMask(StartIndex);
2056 Blend->eraseFromParent();
2061 if (NewBlend->getNumOperands() == 3 &&
2063 VPValue *Inc0 = NewBlend->getOperand(0);
2064 VPValue *Inc1 = NewBlend->getOperand(1);
2065 VPValue *OldMask = NewBlend->getOperand(2);
2066 NewBlend->setOperand(0, Inc1);
2067 NewBlend->setOperand(1, Inc0);
2068 NewBlend->setOperand(2, NewMask);
2095 APInt MaxVal = AlignedTC - 1;
2098 unsigned NewBitWidth =
2104 bool MadeChange =
false;
2129 "canonical IV is not expected to have a truncation");
2134 NewWideIV->insertBefore(WideIV);
2141 Cmp->replaceAllUsesWith(
2142 VPBuilder(Cmp).createICmp(Cmp->getPredicate(), NewWideIV, NewBTC));
2156 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2158 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2172 const SCEV *VectorTripCount =
2177 "Trip count SCEV must be computable");
2198 auto *Term = &ExitingVPBB->
back();
2211 for (
unsigned Part = 0; Part < UF; ++Part) {
2217 Extracts[Part] = Ext;
2229 match(Phi->getBackedgeValue(),
2231 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2248 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2255 "Expected incoming values of Phi to be ActiveLaneMasks");
2260 EntryALM->setOperand(2, ALMMultiplier);
2261 LoopALM->setOperand(2, ALMMultiplier);
2265 ExtractFromALM(EntryALM, EntryExtracts);
2270 ExtractFromALM(LoopALM, LoopExtracts);
2272 Not->setOperand(0, LoopExtracts[0]);
2275 for (
unsigned Part = 0; Part < UF; ++Part) {
2276 Phis[Part]->setStartValue(EntryExtracts[Part]);
2277 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2290 auto *Term = &ExitingVPBB->
back();
2302 const SCEV *VectorTripCount =
2308 "Trip count SCEV must be computable");
2327 Term->setOperand(1, Plan.
getTrue());
2332 {}, Term->getDebugLoc());
2334 Term->eraseFromParent();
2368 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2378 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2379 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2397 RecurKind RK = PhiR->getRecurrenceKind();
2404 RecWithFlags->dropPoisonGeneratingFlags();
2410struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2412 return Def == getEmptyKey();
2423 return GEP->getSourceElementType();
2426 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2427 [](
auto *
I) {
return I->getSourceElementType(); })
2428 .
Default([](
auto *) {
return nullptr; });
2432 static bool canHandle(
const VPSingleDefRecipe *Def) {
2441 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2442 C->second == Instruction::ExtractValue)))
2448 return !
Def->mayReadFromMemory();
2452 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2455 getGEPSourceElementType(Def),
Def->getScalarType(),
2458 if (RFlags->hasPredicate())
2461 return hash_combine(Result, SIVSteps->getInductionOpcode());
2466 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2469 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2471 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2473 !
equal(
L->operands(),
R->operands()))
2476 "must have valid opcode info for both recipes");
2478 if (LFlags->hasPredicate() &&
2479 LFlags->getPredicate() !=
2483 if (LSIV->getInductionOpcode() !=
2489 const VPRegionBlock *RegionL =
L->getRegion();
2490 const VPRegionBlock *RegionR =
R->getRegion();
2493 L->getParent() !=
R->getParent())
2495 return L->getScalarType() ==
R->getScalarType();
2511 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2515 if (!VPDT.
dominates(V->getParent(), VPBB))
2520 Def->replaceAllUsesWith(V);
2551 "Expected vector prehader's successor to be the vector loop region");
2559 return !Op->isDefinedOutsideLoopRegions();
2562 R.moveBefore(*Preheader, Preheader->
end());
2580 assert(!RepR->isPredicated() &&
2581 "Expected prior transformation of predicated replicates to "
2582 "replicate regions");
2587 if (!RepR->isSingleScalar())
2599 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2600 auto *UserR = cast<VPRecipeBase>(U);
2601 VPBasicBlock *Parent = UserR->getParent();
2603 if (SinkBB && SinkBB != Parent)
2608 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2609 Parent->getSinglePredecessor() != LoopRegion;
2619 "Defining block must dominate sink block");
2644 VPValue *ResultVPV = R.getVPSingleValue();
2646 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2647 if (!NewResSizeInBits)
2660 (void)OldResSizeInBits;
2668 VPW->dropPoisonGeneratingFlags();
2670 assert((OldResSizeInBits != NewResSizeInBits ||
2672 "Only ICmps should not need extending the result.");
2678 if (OldResSizeInBits != NewResSizeInBits) {
2680 Instruction::ZExt, ResultVPV, OldResTy);
2682 Ext->setOperand(0, ResultVPV);
2692 unsigned OpSizeInBits =
Op->getScalarType()->getScalarSizeInBits();
2693 if (OpSizeInBits == NewResSizeInBits)
2695 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2696 auto [ProcessedIter, Inserted] = ProcessedTruncs.
try_emplace(
Op);
2702 Builder.setInsertPoint(&R);
2703 ProcessedIter->second =
2704 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2706 Op = ProcessedIter->second;
2710 NWR->insertBefore(&R);
2714 VPValue *Replacement = NWR->getVPSingleValue();
2715 if (OldResSizeInBits != NewResSizeInBits)
2721 R.eraseFromParent();
2727 std::optional<VPDominatorTree> VPDT;
2744 assert(VPBB->getNumSuccessors() == 2 &&
2745 "Two successors expected for BranchOnCond");
2746 unsigned RemovedIdx;
2757 "There must be a single edge between VPBB and its successor");
2765 VPBB->back().eraseFromParent();
2777 if (Reachable.contains(
B))
2788 for (
VPValue *Def : R.definedValues())
2789 Def->replaceAllUsesWith(&Tmp);
2790 R.eraseFromParent();
2847 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2858 auto *EntryIncrement = Builder.createOverflowingOp(
2860 DL,
"index.part.next");
2866 {EntryIncrement, TC, ALMMultiplier},
DL,
2867 "active.lane.mask.entry");
2874 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2879 Builder.setInsertPoint(OriginalTerminator);
2880 auto *InLoopIncrement = Builder.createOverflowingOp(
2882 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2884 {InLoopIncrement, TC, ALMMultiplier},
DL,
2885 "active.lane.mask.next");
2890 auto *NotMask = Builder.createNot(ALM,
DL);
2897 bool UseActiveLaneMaskForControlFlow) {
2899 auto *WideCanonicalIV =
2901 assert(WideCanonicalIV &&
2902 "Must have widened canonical IV when tail folding!");
2905 if (UseActiveLaneMaskForControlFlow) {
2914 nullptr,
"active.lane.mask");
2930 template <
typename OpTy>
bool match(OpTy *V)
const {
2941template <
typename Op0_t,
typename Op1_t>
2949 case Intrinsic::masked_udiv:
2950 return Intrinsic::vp_udiv;
2951 case Intrinsic::masked_sdiv:
2952 return Intrinsic::vp_sdiv;
2953 case Intrinsic::masked_urem:
2954 return Intrinsic::vp_urem;
2955 case Intrinsic::masked_srem:
2956 return Intrinsic::vp_srem;
2958 return std::nullopt;
2973 VPValue *Addr, *Mask, *EndPtr;
2976 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2978 EVLEndPtr->insertBefore(&CurRecipe);
2983 EVLEndPtr->setOperand(1, EVLAsVF);
2987 auto GetVPReverse = [&CurRecipe, &EVL, Plan,
2992 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
2993 V->getScalarType(), {}, {},
DL);
2994 Reverse->insertBefore(&CurRecipe);
2998 if (
match(&CurRecipe,
3009 Mask = GetVPReverse(Mask);
3010 Addr = AdjustEndPtr(EndPtr);
3013 LoadR->insertBefore(&CurRecipe);
3015 {LoadR, Plan->
getTrue(), &EVL},
3016 LoadR->getScalarType(), {}, {},
DL);
3027 NewLoad->setOperand(2, Mask);
3028 NewLoad->setOperand(3, &EVL);
3036 StoredVal, EVL, Mask);
3038 if (
match(&CurRecipe,
3042 Mask = GetVPReverse(Mask);
3043 Addr = AdjustEndPtr(EndPtr);
3044 StoredVal = GetVPReverse(ReversedVal);
3046 StoredVal, EVL, Mask);
3050 if (Rdx->isConditional() &&
3055 if (Interleave->getMask() &&
3063 Intrinsic::vp_merge, {Mask ? Mask : Plan->
getTrue(),
LHS,
RHS, &EVL},
3064 LHS->getScalarType(), {}, {},
DL);
3077 if (
match(&CurRecipe,
3082 LHS->getScalarType(), {}, {},
DL);
3088 {IntrR->getOperand(0),
3089 IntrR->getOperand(1),
3090 Mask ? Mask : Plan->
getTrue(), &EVL},
3091 IntrR->getScalarType(), {}, {},
DL);
3100 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3105 HeaderMask = R.getVPSingleValue();
3116 NewR->insertBefore(R);
3117 for (
auto [Old, New] :
3118 zip_equal(R->definedValues(), NewR->definedValues()))
3119 Old->replaceAllUsesWith(New);
3132 Mask->getScalarType(), {}, {}, LogicalAnd->getDebugLoc());
3133 Merge->insertBefore(LogicalAnd);
3134 LogicalAnd->replaceAllUsesWith(
Merge);
3141 R->eraseFromParent();
3162 auto IsAllowedUser =
3163 IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
3164 VPWidenIntOrFpInductionRecipe,
3165 VPWidenMemIntrinsicRecipe>;
3166 if (match(U, m_Trunc(m_Specific(&Plan.getVF()))))
3167 return all_of(cast<VPSingleDefRecipe>(U)->users(),
3169 return IsAllowedUser(U);
3171 "User of VF that we can't transform to EVL.");
3181 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3182 "increment of the canonical induction.");
3198 MaxEVL = Builder.createScalarZExtOrTrunc(
3202 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3203 VPValue *PrevEVL = Builder.createScalarPhi(
3217 Intrinsic::experimental_vp_splice,
3218 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3219 R.getVPSingleValue()->getScalarType(), {}, {}, R.getDebugLoc());
3221 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3234 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3235 m_VPValue(), m_VPValue()))))
3236 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3237 Plan.getVectorLoopRegion();
3249 VPValue *EVLMask = Builder.createICmp(
3309 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3321 auto *CurrentIteration =
3323 CurrentIteration->insertBefore(*Header, Header->begin());
3324 VPBuilder Builder(Header, Header->getFirstNonPhi());
3327 VPPhi *AVLPhi = Builder.createScalarPhi(
3331 if (MaxSafeElements) {
3341 Builder.setInsertPoint(CanonicalIVIncrement);
3345 OpVPEVL = Builder.createScalarZExtOrTrunc(
3346 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3348 auto *NextIter = Builder.createAdd(
3349 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3350 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3351 CurrentIteration->addOperand(NextIter);
3355 "avl.next", {
true,
false});
3363 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3364 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3378 assert(!CurrentIteration &&
3379 "Found multiple CurrentIteration. Only one expected");
3380 CurrentIteration = PhiR;
3384 if (!CurrentIteration)
3395 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3404 CanIVInc->eraseFromParent();
3413 if (Header->empty())
3422 if (!
match(EVLPhi->getBackedgeValue(),
3435 [[maybe_unused]]
bool FoundAVLNext =
3438 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3446 [[maybe_unused]]
bool FoundIncrement =
match(
3453 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3458 LatchBr->setOperand(
3469 "expected to run before loop regions are created");
3472 auto CanUseVersionedStride = [&VPDT, Preheader](
VPUser &U,
unsigned) {
3475 return VPDT.
dominates(Preheader, Parent);
3478 for (
const SCEV *Stride : StridesMap.
values()) {
3481 const APInt *StrideConst;
3504 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3511 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3514 if (NewSCEV != ScevExpr) {
3516 ExpSCEV->replaceAllUsesWith(NewExp);
3527 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3532 while (!Worklist.
empty()) {
3535 if (!Visited.
insert(CurRec).second)
3557 RecWithFlags->isDisjoint()) {
3560 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3561 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3562 RecWithFlags->replaceAllUsesWith(New);
3563 RecWithFlags->eraseFromParent();
3566 RecWithFlags->dropPoisonGeneratingFlags();
3571 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3572 "found instruction with poison generating flags not covered by "
3573 "VPRecipeWithIRFlags");
3578 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3586 auto IsNotHeaderMask = [&Plan](
VPValue *Mask) {
3598 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3599 if (AddrDef && WidenRec->isConsecutive() &&
3600 IsNotHeaderMask(WidenRec->getMask()))
3601 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3603 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3604 if (AddrDef && IsNotHeaderMask(InterleaveRec->getMask()))
3605 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3615 const bool &EpilogueAllowed) {
3616 if (InterleaveGroups.empty())
3627 IRMemberToRecipe[&MemR->getIngredient()] = MemR;
3634 for (
const auto *IG : InterleaveGroups) {
3639 return !IRMemberToRecipe.contains(Member);
3643 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3647 StoredValues.
push_back(StoreR->getStoredValue());
3648 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3654 StoredValues.
push_back(StoreR->getStoredValue());
3658 bool NeedsMaskForGaps =
3659 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3660 (!StoredValues.
empty() && !IG->isFull());
3663 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3672 VPValue *Addr = Start->getAddr();
3681 assert(IG->getIndex(IRInsertPos) != 0 &&
3682 "index of insert position shouldn't be zero");
3686 IG->getIndex(IRInsertPos),
3690 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3696 if (IG->isReverse()) {
3699 -(int64_t)IG->getFactor(), NW, InsertPosR->
getDebugLoc());
3700 ReversePtr->insertBefore(InsertPosR);
3704 IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps,
3706 VPIG->insertBefore(InsertPosR);
3709 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3712 if (!Member->getType()->isVoidTy()) {
3770 AddOp = Instruction::Add;
3771 MulOp = Instruction::Mul;
3773 AddOp =
ID.getInductionOpcode();
3774 MulOp = Instruction::FMul;
3782 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3783 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3792 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3797 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3798 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3816 if (R->getParent()->getEnclosingLoopRegion())
3817 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3822 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3825 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3827 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3834 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3837 WidePHI->addOperand(
Next);
3864 VPlan *Plan = R->getParent()->getPlan();
3865 VPValue *Start = R->getStartValue();
3866 VPValue *Step = R->getStepValue();
3867 VPValue *VF = R->getVFValue();
3869 assert(R->getInductionDescriptor().getKind() ==
3871 "Not a pointer induction according to InductionDescriptor!");
3872 assert(R->getScalarType()->isPointerTy() &&
"Unexpected type.");
3874 "Recipe should have been replaced");
3880 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3884 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3887 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3889 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3890 R->replaceAllUsesWith(PtrAdd);
3895 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3896 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3899 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3907 VPValue *Step = R->getStepValue();
3908 VPValue *Index = R->getIndex();
3912 ? Builder.createScalarSExtOrTrunc(
3914 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
3916 switch (R->getInductionKind()) {
3918 assert(Index->getScalarType() == Start->getScalarType() &&
3919 "Index type does not match StartValue type");
3920 return R->replaceAllUsesWith(Builder.createAdd(
3921 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3924 return R->replaceAllUsesWith(Builder.createPtrAdd(
3925 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3930 (FPBinOp->
getOpcode() == Instruction::FAdd ||
3931 FPBinOp->
getOpcode() == Instruction::FSub) &&
3932 "Original BinOp should be defined for FP induction");
3934 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
3935 return R->replaceAllUsesWith(
3936 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
3949 if (!R->isReplicator())
3953 R->dissolveToCFGLoop();
3974 assert(Br->getNumOperands() == 2 &&
3975 "BranchOnTwoConds must have exactly 2 conditions");
3979 assert(Successors.size() == 3 &&
3980 "BranchOnTwoConds must have exactly 3 successors");
3985 VPValue *Cond0 = Br->getOperand(0);
3986 VPValue *Cond1 = Br->getOperand(1);
3991 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4004 Br->eraseFromParent();
4015 WidenIVR->eraseFromParent();
4025 WidenIVR->replaceAllUsesWith(PtrAdd);
4026 WidenIVR->eraseFromParent();
4030 WidenIVR->eraseFromParent();
4036 DerivedIVR->eraseFromParent();
4041 VPValue *CanIV = WideCanIV->getCanonicalIV();
4043 VPValue *Step = WideCanIV->getStepValue();
4046 "Expected unroller to have materialized step for UF != 1");
4051 Step = Builder.createAdd(
4054 Builder.createAdd(CanIV, Step, WideCanIV->getDebugLoc(),
"vec.iv",
4055 WideCanIV->getNoWrapFlags());
4057 WideCanIV->eraseFromParent();
4064 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4065 Select = Builder.createSelect(Blend->getMask(
I),
4066 Blend->getIncomingValue(
I),
Select,
4067 R.getDebugLoc(),
"predphi", *Blend);
4068 Blend->replaceAllUsesWith(
Select);
4069 Blend->eraseFromParent();
4074 if (!VEPR->getOffset()) {
4076 "Expected unroller to have materialized offset for UF != 1");
4077 VEPR->materializeOffset();
4084 Expr->eraseFromParent();
4094 for (
VPValue *
Op : LastActiveL->operands()) {
4095 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4100 VPValue *FirstInactiveLane = Builder.createFirstActiveLane(
4101 NotMasks, LastActiveL->getDebugLoc(),
"first.inactive.lane");
4107 Builder.createSub(FirstInactiveLane, One,
4108 LastActiveL->getDebugLoc(),
"last.active.lane");
4111 LastActiveL->eraseFromParent();
4118 assert(VPI->isMasked() &&
4119 "Unmasked MaskedCond should be simplified earlier");
4120 VPI->replaceAllUsesWith(Builder.createNaryOp(
4122 VPI->eraseFromParent();
4132 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4133 VPI->getDebugLoc());
4134 VPI->replaceAllUsesWith(
Add);
4135 VPI->eraseFromParent();
4143 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4146 BranchOnCountInst->eraseFromParent();
4161 ? Instruction::UIToFP
4162 : Instruction::Trunc;
4163 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4169 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4175 MulOpc = Instruction::FMul;
4176 Flags = VPI->getFastMathFlags();
4178 MulOpc = Instruction::Mul;
4183 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4185 VPI->replaceAllUsesWith(VectorStep);
4186 VPI->eraseFromParent();
4196 struct EarlyExitInfo {
4207 if (Pred == MiddleVPBB)
4212 VPValue *CondOfEarlyExitingVPBB;
4213 [[maybe_unused]]
bool Matched =
4214 match(EarlyExitingVPBB->getTerminator(),
4216 assert(Matched &&
"Terminator must be BranchOnCond");
4220 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4221 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4223 TrueSucc == ExitBlock
4224 ? CondOfEarlyExitingVPBB
4225 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4231 "exit condition must dominate the latch");
4240 assert(!Exits.
empty() &&
"must have at least one early exit");
4247 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4249 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4250 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4256 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4257 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4259 Exits[
I].EarlyExitingVPBB) &&
4260 "RPO sort must place dominating exits before dominated ones");
4266 VPValue *Combined = Exits[0].CondToExit;
4267 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4268 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4274 "Early exit store masking not implemented");
4278 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4282 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4290 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4324 for (
auto [Exit, VectorEarlyExitVPBB] :
4325 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4326 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4338 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4339 VPValue *NewIncoming = IncomingVal;
4341 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4346 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4347 ExitIRI->addOperand(NewIncoming);
4350 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4384 bool IsLastDispatch = (
I + 2 == Exits.
size());
4386 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4392 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4395 CurrentBB = FalseBB;
4405 "Unexpected terminator");
4406 VPValue *IsLatchExitTaken = LatchExitingBranch->getOperand(0);
4408 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4409 LatchExitingBranch->eraseFromParent();
4410 Builder.setInsertPoint(LatchVPBB);
4412 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4414 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4425 VPValue *VecOp = Red->getVecOp();
4427 assert(!Red->isPartialReduction() &&
4428 "This path does not support partial reductions");
4431 auto IsExtendedRedValidAndClampRange =
4444 "getExtendedReductionCost only supports integer types");
4445 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4446 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4447 Red->getFastMathFlags(),
CostKind);
4448 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4456 IsExtendedRedValidAndClampRange(
4477 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4478 Opcode != Instruction::FAdd)
4481 assert(!Red->isPartialReduction() &&
4482 "This path does not support partial reductions");
4486 auto IsMulAccValidAndClampRange =
4498 (Ext0->getOpcode() != Ext1->getOpcode() ||
4499 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4503 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4505 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4512 ExtCost += Ext0->computeCost(VF, Ctx);
4514 ExtCost += Ext1->computeCost(VF, Ctx);
4516 ExtCost += OuterExt->computeCost(VF, Ctx);
4518 return MulAccCost.
isValid() &&
4519 MulAccCost < ExtCost + MulCost + RedCost;
4524 VPValue *VecOp = Red->getVecOp();
4562 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4564 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4565 Mul->setOperand(1, ExtB);
4575 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4580 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4587 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4604 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4613 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4614 Ext0->getOpcode() == Ext1->getOpcode() &&
4615 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4617 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getScalarType(),
nullptr,
4618 *Ext0, *Ext0, Ext0->getDebugLoc());
4619 NewExt0->insertBefore(Ext0);
4624 Ext->getScalarType(),
nullptr, *Ext1,
4625 *Ext1, Ext1->getDebugLoc());
4628 auto *NewMul =
Mul->cloneWithOperands({NewExt0, NewExt1});
4629 NewMul->insertBefore(
Mul);
4630 Ext->replaceAllUsesWith(NewMul);
4631 Ext->eraseFromParent();
4632 Mul->eraseFromParent();
4646 assert(!Red->isPartialReduction() &&
4647 "This path does not support partial reductions");
4650 auto IP = std::next(Red->getIterator());
4651 auto *VPBB = Red->getParent();
4661 Red->replaceAllUsesWith(AbstractR);
4691 for (
VPValue *VPV : VPValues) {
4700 if (
User->usesScalars(VPV))
4703 HoistPoint = HoistBlock->
begin();
4707 "All users must be in the vector preheader or dominated by it");
4712 VPV->replaceUsesWithIf(Broadcast,
4713 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4714 return Broadcast != &U && !U.usesScalars(VPV);
4725 return CommonMetadata;
4728template <
unsigned Opcode>
4733 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4734 "Only Load and Store opcodes supported");
4735 [[maybe_unused]]
constexpr bool IsLoad = (Opcode == Instruction::Load);
4742 for (
auto Recipes :
Groups) {
4743 if (Recipes.size() < 2)
4748 "Expected all recipes in group to have the same load-store type");
4755 VPValue *MaskI = RecipeI->getMask();
4761 bool HasComplementaryMask =
false;
4766 VPValue *MaskJ = RecipeJ->getMask();
4775 if (HasComplementaryMask) {
4776 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4786template <
typename InstType>
4804 for (
auto &Group :
Groups) {
4824 return R->isSingleScalar() == IsSingleScalar;
4826 "all members in group must agree on IsSingleScalar");
4831 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4832 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4834 UnpredicatedLoad->insertBefore(EarliestLoad);
4838 Load->replaceAllUsesWith(UnpredicatedLoad);
4839 Load->eraseFromParent();
4848 if (!StoreLoc || !StoreLoc->AATags.Scope)
4854 StoresToSink.
end());
4858 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L);
4870 for (
auto &Group :
Groups) {
4883 VPValue *SelectedValue = Group[0]->getOperand(0);
4886 bool IsSingleScalar = Group[0]->isSingleScalar();
4887 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4888 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4889 "all members in group must agree on IsSingleScalar");
4890 VPValue *Mask = Group[
I]->getMask();
4892 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4901 StoreWithMinAlign->getUnderlyingInstr(),
4902 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4903 nullptr, *LastStore, CommonMetadata);
4904 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4908 Store->eraseFromParent();
4915 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4916 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4979 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4981 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4988 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4998 DefR->replaceUsesWithIf(
4999 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5001 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5015 for (
VPValue *Def : R.definedValues()) {
5028 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5030 return U->usesScalars(Def) &&
5033 if (
none_of(Def->users(), IsCandidateUnpackUser))
5040 Unpack->insertAfter(&R);
5041 Def->replaceUsesWithIf(Unpack,
5042 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5043 return IsCandidateUnpackUser(&U);
5052 bool RequiresScalarEpilogue,
VPValue *Step,
5053 std::optional<uint64_t> MaxRuntimeStep) {
5065 "Step VPBB must dominate VectorPHVPBB");
5067 InsertPt = std::next(StepR->getIterator());
5069 VPBuilder Builder(VectorPHVPBB, InsertPt);
5075 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5087 if (TailByMasking) {
5088 TC = Builder.createAdd(
5099 Builder.createNaryOp(Instruction::URem, {TC, Step},
5108 if (RequiresScalarEpilogue) {
5110 "requiring scalar epilogue is not supported with fail folding");
5113 R = Builder.createSelect(IsZero, Step, R);
5127 "VF and VFxUF must be materialized together");
5139 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5146 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5150 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5154 VPValue *MulByUF = Builder.createOverflowingOp(
5167 auto *AliasMask = Builder.createNaryOp(
5171 if (HeaderMaskDef->isPhi())
5172 Builder =
VPBuilder(&*HeaderMaskDef->getParent()->getFirstNonPhi());
5177 auto *ClampedHeaderMask = Builder.createAnd(HeaderMask, AliasMask);
5179 return &U != ClampedHeaderMask;
5190 assert(IncomingAliasMask &&
"Expected an alias mask!");
5200 if (
Check.NeedsFreeze) {
5210 Intrinsic::loop_dependence_war_mask,
5214 AliasMask = Builder.createAnd(AliasMask, WARMask);
5216 AliasMask = WARMask;
5221 VPValue *NumActive = Builder.createNaryOp(
5224 VPValue *ClampedVF = Builder.createScalarZExtOrTrunc(
5250 VPValue *DistanceToMax = Builder.createSub(MaxUIntTripCount, TripCount);
5258 VPValue *TripCountCheck = Builder.createICmp(
5261 VPValue *
Cond = Builder.createOr(IsScalar, TripCountCheck,
DL);
5272 "Clamped VF not supported with interleaving");
5280 VPBuilder Builder(Entry, Entry->begin());
5288 if (!ExpSCEV || ExpSCEV->getNumUsers() == 0)
5290 Builder.setInsertPoint(ExpSCEV);
5297 ExpSCEV->eraseFromParent();
5306 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5313 const SCEV *Expr = ExpSCEV->getSCEV();
5316 ExpandedSCEVs[Expr] = Res;
5321 ExpSCEV->eraseFromParent();
5324 "all VPExpandSCEVRecipes must have been expanded");
5327 auto EI = Entry->begin();
5337 return ExpandedSCEVs;
5349 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5353 return Member0Op == OpV;
5357 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5360 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5377 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5380 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5385 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5386 const auto &[
OpIdx, OpV] =
P;
5398static std::optional<ElementCount>
5402 if (!InterleaveR || InterleaveR->
getMask())
5403 return std::nullopt;
5405 Type *GroupElementTy =
nullptr;
5409 return Op->getScalarType() == GroupElementTy;
5411 return std::nullopt;
5415 return Op->getScalarType() == GroupElementTy;
5417 return std::nullopt;
5421 if (IG->getFactor() != IG->getNumMembers())
5422 return std::nullopt;
5428 assert(
Size.isScalable() == VF.isScalable() &&
5429 "if Size is scalable, VF must be scalable and vice versa");
5430 return Size.getKnownMinValue();
5434 unsigned MinVal = VF.getKnownMinValue();
5436 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5439 return std::nullopt;
5447 return RepR && RepR->isSingleScalar();
5454 auto *R = V->getDefiningRecipe();
5463 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5464 WideMember0->setOperand(
5473 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5475 LoadGroup->getMask(),
true,
5476 *LoadGroup, LoadGroup->getDebugLoc());
5477 L->insertBefore(LoadGroup);
5483 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5484 "must be a single scalar load");
5485 NarrowedOps.
insert(RepR);
5490 VPValue *PtrOp = WideLoad->getAddr();
5492 PtrOp = VecPtr->getOperand(0);
5497 nullptr, {}, *WideLoad);
5498 N->insertBefore(WideLoad);
5503std::unique_ptr<VPlan>
5523 "unexpected branch-on-count");
5526 std::optional<ElementCount> VFToOptimize;
5540 if (R.mayWriteToMemory() && !InterleaveR)
5546 return any_of(V->users(), [&](VPUser *U) {
5547 auto *UR = cast<VPRecipeBase>(U);
5548 return UR->getParent()->getParent() != VectorLoop;
5565 std::optional<ElementCount> NarrowedVF =
5567 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5569 VFToOptimize = NarrowedVF;
5572 if (InterleaveR->getStoredValues().empty())
5577 auto *Member0 = InterleaveR->getStoredValues()[0];
5587 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5590 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5591 return IR && IR->getInterleaveGroup()->isFull() &&
5592 IR->getVPValue(Op.index()) == Op.value();
5601 VFToOptimize->isScalable()))
5606 if (StoreGroups.empty())
5610 bool RequiresScalarEpilogue =
5621 std::unique_ptr<VPlan> NewPlan;
5623 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5624 Plan.
setVF(*VFToOptimize);
5625 NewPlan->removeVF(*VFToOptimize);
5631 for (
auto *StoreGroup : StoreGroups) {
5638 StoreGroup->getDebugLoc());
5639 S->insertBefore(StoreGroup);
5640 StoreGroup->eraseFromParent();
5646 Type *CanIVTy = VectorLoop->getCanonicalIVType();
5652 if (VFToOptimize->isScalable()) {
5655 Step = PHBuilder.createOverflowingOp(Instruction::Mul, {VScale,
UF},
5663 materializeVectorTripCount(Plan, VectorPH,
false,
5664 RequiresScalarEpilogue, Step);
5669 removeDeadRecipes(Plan);
5672 "All VPVectorPointerRecipes should have been removed");
5688 "must have a BranchOnCond");
5691 if (VF.
isScalable() && VScaleForTuning.has_value())
5692 VectorStep *= *VScaleForTuning;
5693 assert(VectorStep > 0 &&
"trip count should not be zero");
5697 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5716 "Cannot handle loops with uncountable early exits");
5723 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
5730 if (
any_of(RecurSplice->users(),
5731 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
5812 {},
"vector.recur.extract.for.phi");
5815 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
5829 VPValue *WidenIVCandidate = BinOp->getOperand(0);
5830 VPValue *InvariantCandidate = BinOp->getOperand(1);
5832 std::swap(WidenIVCandidate, InvariantCandidate);
5846 auto *ClonedOp = BinOp->
clone();
5847 if (ClonedOp->getOperand(0) == WidenIV) {
5848 ClonedOp->setOperand(0, ScalarIV);
5850 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
5851 ClonedOp->setOperand(1, ScalarIV);
5866 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
5867 bool UseMax) -> std::optional<APSInt> {
5869 for (
bool Signed : {
true,
false}) {
5878 return std::nullopt;
5886 PhiR->getRecurrenceKind()))
5895 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5909 !
match(FindLastSelect,
5918 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
5924 "IVOfExpressionToSink not being an AddRec must imply "
5925 "FindLastExpression not being an AddRec.");
5936 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
5937 bool UseSigned = SentinelVal && SentinelVal->isSigned();
5944 if (IVOfExpressionToSink) {
5945 const SCEV *FindLastExpressionSCEV =
5947 if (
match(FindLastExpressionSCEV,
5950 if (
auto NewSentinel =
5951 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
5954 SentinelVal = *NewSentinel;
5955 UseSigned = NewSentinel->isSigned();
5957 IVSCEV = FindLastExpressionSCEV;
5958 IVOfExpressionToSink =
nullptr;
5968 if (AR->hasNoSignedWrap())
5970 else if (AR->hasNoUnsignedWrap())
5980 VPValue *NewFindLastSelect = BackedgeVal;
5982 if (!SentinelVal || IVOfExpressionToSink) {
5985 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
5986 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
5987 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
5988 SelectCond = LoopBuilder.
createNot(SelectCond);
5995 if (SelectCond !=
Cond || IVOfExpressionToSink) {
5998 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
6007 VPIRFlags Flags(MinMaxKind,
false,
false,
6013 NewFindLastSelect, Flags, ExitDL);
6016 VPValue *VectorRegionExitingVal = ReducedIV;
6017 if (IVOfExpressionToSink)
6018 VectorRegionExitingVal =
6020 ReducedIV, IVOfExpressionToSink);
6023 VPValue *StartVPV = PhiR->getStartValue();
6030 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
6040 AnyOfPhi->insertAfter(PhiR);
6047 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
6060 PhiR->hasUsesOutsideReductionChain());
6061 NewPhiR->insertBefore(PhiR);
6062 PhiR->replaceAllUsesWith(NewPhiR);
6063 PhiR->eraseFromParent();
6070struct ReductionExtend {
6071 Type *SrcType =
nullptr;
6072 ExtendKind Kind = ExtendKind::PR_None;
6078struct ExtendedReductionOperand {
6082 ReductionExtend ExtendA, ExtendB;
6090struct VPPartialReductionChain {
6093 VPWidenRecipe *ReductionBinOp =
nullptr;
6095 ExtendedReductionOperand ExtendedOp;
6102 unsigned AccumulatorOpIdx;
6103 unsigned ScaleFactor;
6115 if (!
Op->hasOneUse() ||
6121 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6122 Op->getOperand(1), NarrowTy);
6124 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6133 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
6135 assert(Ext->getOpcode() ==
6137 "Expected both the LHS and RHS extends to be the same");
6138 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6141 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6142 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6143 auto *
Max = Builder.insert(
6145 {FreezeX, FreezeY}, SrcTy));
6146 auto *Min = Builder.insert(
6148 {FreezeX, FreezeY}, SrcTy));
6151 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6152 Op->getScalarType());
6164 if (!
Mul->hasOneUse() ||
6165 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6166 MulLHS->getOpcode() != MulRHS->getOpcode())
6169 auto *NewLHS = Builder.createWidenCast(
6170 MulLHS->getOpcode(), MulLHS->getOperand(0), Ext->getScalarType());
6171 auto *NewRHS = MulLHS == MulRHS
6173 : Builder.createWidenCast(MulRHS->getOpcode(),
6174 MulRHS->getOperand(0),
6175 Ext->getScalarType());
6176 auto *NewMul =
Mul->cloneWithOperands({NewLHS, NewRHS});
6177 Builder.insert(NewMul);
6178 Op->replaceAllUsesWith(NewMul);
6179 Op->eraseFromParent();
6180 Mul->eraseFromParent();
6189 VPValue *VecOp = Red->getVecOp();
6243static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6251 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6267 if ((WidenRecipe->
getOpcode() == Instruction::Sub &&
6269 (WidenRecipe->
getOpcode() == Instruction::FSub &&
6274 if (WidenRecipe->
getOpcode() == Instruction::FSub) {
6284 Builder.insert(NegRecipe);
6285 ExtendedOp = NegRecipe;
6289 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp);
6299 assert((!ExitValue || IsLastInChain) &&
6300 "if we found ExitValue, it must match RdxPhi's backedge value");
6311 PartialRed->insertBefore(WidenRecipe);
6319 E->insertBefore(WidenRecipe);
6320 PartialRed->replaceAllUsesWith(
E);
6333 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6334 StartInst->setOperand(2, NewScaleFactor);
6342 VPValue *OldStartValue = StartInst->getOperand(0);
6343 StartInst->setOperand(0, StartInst->getOperand(1));
6347 assert(RdxResult &&
"Could not find reduction result");
6350 unsigned SubOpc = Chain.RK ==
RecurKind::FSub ? Instruction::BinaryOps::FSub
6351 : Instruction::BinaryOps::Sub;
6357 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6363 const VPPartialReductionChain &Link,
6366 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6367 std::optional<unsigned> BinOpc = std::nullopt;
6369 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6370 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6372 std::optional<llvm::FastMathFlags>
Flags;
6376 auto GetLinkOpcode = [&Link]() ->
unsigned {
6379 return Instruction::Add;
6381 return Instruction::FAdd;
6383 return Link.ReductionBinOp->
getOpcode();
6388 GetLinkOpcode(), ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType,
6389 RdxType, VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6410static std::optional<ExtendedReductionOperand>
6413 "Op should be operand of UpdateR");
6421 if (
Op->hasOneUse() &&
6430 Type *RHSInputType =
Y->getScalarType();
6431 if (LHSInputType != RHSInputType ||
6432 LHSExt->getOpcode() != RHSExt->getOpcode())
6433 return std::nullopt;
6436 return ExtendedReductionOperand{
6438 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6442 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6445 VPValue *CastSource = CastRecipe->getOperand(0);
6446 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6456 if (UpdateR->
getOpcode() == Instruction::Sub)
6457 return std::nullopt;
6459 return ExtendedReductionOperand{
6466 if (!
Op->hasOneUse())
6467 return std::nullopt;
6472 return std::nullopt;
6482 return std::nullopt;
6486 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6489 const APInt *RHSConst =
nullptr;
6495 return std::nullopt;
6499 if (Cast && OuterExtKind &&
6500 getPartialReductionExtendKind(Cast) != OuterExtKind)
6501 return std::nullopt;
6503 Type *RHSInputType = LHSInputType;
6504 ExtendKind RHSExtendKind = LHSExtendKind;
6507 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6510 return ExtendedReductionOperand{
6511 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6518static std::optional<SmallVector<VPPartialReductionChain>>
6526 return std::nullopt;
6536 VPValue *CurrentValue = ExitValue;
6537 while (CurrentValue != RedPhiR) {
6540 return std::nullopt;
6547 std::optional<ExtendedReductionOperand> ExtendedOp =
6548 matchExtendedReductionOperand(UpdateR,
Op);
6550 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue);
6552 return std::nullopt;
6556 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6559 return std::nullopt;
6564 VPPartialReductionChain Link(
6565 {UpdateR, *ExtendedOp, RK,
6569 CurrentValue = PrevValue;
6574 std::reverse(Chain.
begin(), Chain.
end());
6593 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6594 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6597 if (ChainsByPhi.
empty())
6604 for (
const auto &[
_, Chains] : ChainsByPhi)
6605 for (
const VPPartialReductionChain &Chain : Chains) {
6606 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6607 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6613 auto ExtendUsersValid = [&](
VPValue *Ext) {
6615 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6619 auto IsProfitablePartialReductionChainForVF =
6626 for (
const VPPartialReductionChain &Link : Chain) {
6627 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6628 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6632 PartialCost += LinkCost;
6633 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6635 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6636 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6639 RegularCost += Extend->computeCost(VF, CostCtx);
6641 return PartialCost.
isValid() && PartialCost < RegularCost;
6649 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6650 for (
const VPPartialReductionChain &Chain : Chains) {
6651 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6655 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6657 return PhiR == RedPhiR;
6659 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6665 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6674 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6675 return RepR && RepR->getOpcode() == Instruction::Store;
6686 return IsProfitablePartialReductionChainForVF(Chains, VF);
6692 for (
auto &[Phi, Chains] : ChainsByPhi)
6693 for (
const VPPartialReductionChain &Chain : Chains)
6694 transformToPartialReduction(Chain, Plan, Phi);
6708 if (VPI && VPI->getUnderlyingValue() &&
6720 New->insertBefore(VPI);
6721 if (VPI->getOpcode() == Instruction::Load)
6722 VPI->replaceAllUsesWith(New->getVPSingleValue());
6723 VPI->eraseFromParent();
6728 FinalRedStoresBuilder))
6737 ReplaceWith(Histogram);
6745 ReplaceWith(Recipe);
6768 if (VPI->mayHaveSideEffects())
6772 if (VPI->isMasked() && !VPI->isSafeToSpeculativelyExecute())
6777 if (VPI->getOpcode() == Instruction::Add &&
6786 I, VPI->operandsWithoutMask(),
true,
6787 nullptr, *VPI, *VPI, VPI->getDebugLoc());
6788 Recipe->insertBefore(VPI);
6789 VPI->replaceAllUsesWith(Recipe);
6790 VPI->eraseFromParent();
6800 switch (Param.ParamKind) {
6801 case VFParamKind::Vector:
6802 case VFParamKind::GlobalPredicate:
6804 case VFParamKind::OMP_Uniform:
6805 return SE->isSCEVable(Args[Param.ParamPos]->getScalarType()) &&
6806 SE->isLoopInvariant(
6807 vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6809 case VFParamKind::OMP_Linear:
6810 return match(vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6811 m_scev_AffineAddRec(
6812 m_SCEV(), m_scev_SpecificSInt(Param.LinearStepOrPos),
6813 m_SpecificLoop(L)));
6830 const auto *It =
find_if(Mappings, [&](
const VFInfo &Info) {
6831 return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) &&
6834 if (It == Mappings.end())
6841struct CallWideningDecision {
6842 enum class KindTy { Scalarize,
Intrinsic, VectorVariant };
6843 CallWideningDecision(KindTy Kind, Function *Variant =
nullptr)
6866 return CallWideningDecision::KindTy::Scalarize;
6876 return CallWideningDecision::KindTy::Scalarize;
6880 false, VF, CostCtx);
6895 return CallWideningDecision::KindTy::Intrinsic;
6899 if (VecFunc && ScalarCost >= VecCallCost)
6900 return {CallWideningDecision::KindTy::VectorVariant, VecFunc};
6902 return CallWideningDecision::KindTy::Scalarize;
6912 if (!VPI || !VPI->getUnderlyingValue() ||
6913 VPI->getOpcode() != Instruction::Call)
6918 VPI->op_begin() + CI->arg_size());
6920 CallWideningDecision Decision =
6929 switch (Decision.Kind) {
6930 case CallWideningDecision::KindTy::Intrinsic: {
6934 *VPI, VPI->getDebugLoc());
6937 case CallWideningDecision::KindTy::VectorVariant: {
6941 VPValue *Mask = VPI->isMasked() ? VPI->getMask() : Plan.
getTrue();
6942 Ops.push_back(Mask);
6944 Ops.push_back(VPI->getOperand(VPI->getNumOperandsWithoutMask() - 1));
6946 *VPI, VPI->getDebugLoc());
6949 case CallWideningDecision::KindTy::Scalarize:
6955 VPI->replaceAllUsesWith(Replacement);
6956 VPI->eraseFromParent();
6979 if (!LoadR || LoadR->isConsecutive())
6998 Align Alignment = LoadR->getAlign();
7001 if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
7006 Intrinsic::experimental_vp_strided_load, DataTy,
7007 LoadR->isMasked(), Alignment, Ctx);
7008 return StridedLoadStoreCost < CurrentCost;
7019 Ctx.invalidateWideningDecision(&LoadR->getIngredient(), VF);
7024 I32VF = Builder.createScalarZExtOrTrunc(
7035 "Stride type from SCEV must match the index type");
7036 VPValue *CanIVTyStride = Builder.createScalarSExtOrTrunc(
7040 auto *
Offset = Builder.createOverflowingOp(
7042 {AddRecPtr->hasNoUnsignedWrap(), AddRecPtr->hasNoSignedWrap()});
7043 auto *BasePtr = Builder.createNoWrapPtrAdd(
7049 VPValue *NewPtr = Builder.createVectorPointer(
7051 Ptr->getGEPNoWrapFlags(), Ptr->getDebugLoc());
7053 VPValue *Mask = LoadR->getMask();
7056 auto *StridedLoad = Builder.createWidenMemIntrinsic(
7057 Intrinsic::experimental_vp_strided_load,
7058 {NewPtr, StrideInBytes, Mask, I32VF}, LoadTy, Alignment, *LoadR,
7059 LoadR->getDebugLoc());
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
const T & front() const
Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class represents a constant integer value.
ConstantInt * getValue() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Lightweight SCEV-to-VPlan expander.
VPValue * tryToExpand(const SCEV *S)
Try to expand S into recipes and live-ins using the builder.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
VPUser * getSingleUser()
Return the single user of this value, or nullptr if there is not exactly one user.
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe for widening Call instructions using library calls.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
A common mixin class for widening memory operations.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
canonical_widen_iv_match m_CanonicalWidenIV()
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
auto m_AnyNeg(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPValue * findIncomingAliasMask(const VPlan &Plan)
Finds the incoming alias-mask within the vector preheader.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) Note: If ...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
constexpr auto bind_back(FnT &&Fn, BindArgsT &&...BindArgs)
C++23 bind_back.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
MDNode * Scope
The tag for alias scope specification (used with noalias).
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
Holds the VFShape for a specific scalar to vector function mapping.
Encapsulates information needed to describe a parameter.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
bool isMaskRequired(Instruction *I) const
Forwards to LoopVectorizationCostModel::isMaskRequired.
PredicatedScalarEvolution & PSE
bool willBeScalarized(Instruction *I, ElementCount VF) const
Returns true if I is known to be scalarized at VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetLibraryInfo & TLI
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...