57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
82 false , *VPI, Ingredient.getDebugLoc());
85 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
86 nullptr ,
false , *VPI,
87 Ingredient.getDebugLoc());
90 Ingredient.getDebugLoc());
102 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
107 if (VectorID == Intrinsic::assume ||
108 VectorID == Intrinsic::lifetime_end ||
109 VectorID == Intrinsic::lifetime_start ||
110 VectorID == Intrinsic::sideeffect ||
111 VectorID == Intrinsic::pseudoprobe) {
116 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
117 VectorID != Intrinsic::pseudoprobe;
121 Ingredient.getDebugLoc());
124 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
125 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
129 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
133 *VPI, Ingredient.getDebugLoc());
137 "inductions must be created earlier");
146 "Only recpies with zero or one defined values expected");
147 Ingredient.eraseFromParent();
164 if (
A->getOpcode() != Instruction::Store ||
165 B->getOpcode() != Instruction::Store)
175 const APInt *Distance;
181 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
183 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
189 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
191 auto VFs =
B->getParent()->getPlan()->vectorFactors();
195 return Distance->
abs().
uge(
203 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
204 L(L), TypeInfo(TypeInfo) {}
211 return ExcludeRecipes.contains(&R) ||
212 (Store && isNoAliasViaDistance(Store, &GroupLeader));
225 std::optional<SinkStoreInfo> SinkInfo = {}) {
226 bool CheckReads = SinkInfo.has_value();
233 if (SinkInfo && SinkInfo->shouldSkip(R))
237 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
255template <
unsigned Opcode>
260 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
261 "Only Load and Store opcodes supported");
262 constexpr bool IsLoad = (Opcode == Instruction::Load);
269 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
273 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
276 RecipesByAddress[AddrSCEV].push_back(RepR);
281 for (
auto &Group :
Groups) {
296 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
308 if (Candidate->getParent() == SinkTo ||
313 if (!ScalarVFOnly && RepR->isSingleScalar())
316 WorkList.
insert({SinkTo, Candidate});
328 for (
auto &Recipe : *VPBB)
330 InsertIfValidSinkCandidate(VPBB,
Op);
334 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
337 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
342 auto UsersOutsideSinkTo =
344 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
346 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
347 return !U->usesFirstLaneOnly(SinkCandidate);
350 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
352 if (NeedsDuplicating) {
356 if (
auto *SinkCandidateRepR =
362 nullptr , *SinkCandidateRepR,
366 Clone = SinkCandidate->
clone();
376 InsertIfValidSinkCandidate(SinkTo,
Op);
386 if (!EntryBB || EntryBB->size() != 1 ||
396 if (EntryBB->getNumSuccessors() != 2)
401 if (!Succ0 || !Succ1)
404 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
406 if (Succ0->getSingleSuccessor() == Succ1)
408 if (Succ1->getSingleSuccessor() == Succ0)
425 if (!Region1->isReplicator())
427 auto *MiddleBasicBlock =
429 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
434 if (!Region2 || !Region2->isReplicator())
439 if (!Mask1 || Mask1 != Mask2)
442 assert(Mask1 && Mask2 &&
"both region must have conditions");
448 if (TransformedRegions.
contains(Region1))
455 if (!Then1 || !Then2)
475 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
481 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
482 Phi1ToMove.eraseFromParent();
485 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
499 TransformedRegions.
insert(Region1);
502 return !TransformedRegions.
empty();
510 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
511 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
512 auto *BlockInMask = PredRecipe->
getMask();
533 Region->setParent(ParentRegion);
539 RecipeWithoutMask->getDebugLoc());
540 Exiting->appendRecipe(PHIRecipe);
553 if (RepR->isPredicated())
572 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
584 if (!VPBB->getParent())
588 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
597 R.moveBefore(*PredVPBB, PredVPBB->
end());
599 auto *ParentRegion = VPBB->getParent();
600 if (ParentRegion && ParentRegion->getExiting() == VPBB)
601 ParentRegion->setExiting(PredVPBB);
605 return !WorkList.
empty();
612 bool ShouldSimplify =
true;
613 while (ShouldSimplify) {
629 if (!
IV ||
IV->getTruncInst())
644 for (
auto *U : FindMyCast->
users()) {
646 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
647 FoundUserCast = UserCast;
654 FindMyCast = FoundUserCast;
656 if (FindMyCast !=
IV)
671 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
681 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
687 if (ResultTy != StepTy) {
694 Builder.setInsertPoint(VecPreheader);
695 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
697 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
723 WideCanIV->getDebugLoc(), Builder));
724 WideCanIV->eraseFromParent();
736 if (!WidenIV || !WidenIV->isCanonical())
740 WidenIV->dropPoisonGeneratingFlags();
741 WideCanIV->replaceAllUsesWith(WidenIV);
742 WideCanIV->eraseFromParent();
751 if (PHICost > BroadcastCost)
760 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
772 WideCanIV->getNoWrapFlags(), WideCanIV->getDebugLoc());
773 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
774 WideCanIV->replaceAllUsesWith(NewWideIV);
775 WideCanIV->eraseFromParent();
783 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
785 if (IsConditionalAssume)
788 if (R.mayHaveSideEffects())
792 return all_of(R.definedValues(),
793 [](
VPValue *V) { return V->getNumUsers() == 0; });
813 VPUser *PhiUser = PhiR->getSingleUser();
819 PhiR->replaceAllUsesWith(Start);
820 PhiR->eraseFromParent();
828 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
831 Users.insert_range(V->users());
833 return Users.takeVector();
847 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
884 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
885 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
898 Def->operands(),
true,
900 Clone->insertAfter(Def);
901 Def->replaceAllUsesWith(Clone);
912 PtrIV->replaceAllUsesWith(PtrAdd);
919 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
920 return U->usesScalars(WideIV);
926 Plan,
ID.getKind(),
ID.getInductionOpcode(),
928 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
929 WideIV->getDebugLoc(), Builder);
932 if (!HasOnlyVectorVFs) {
934 "plans containing a scalar VF cannot also include scalable VFs");
935 WideIV->replaceAllUsesWith(Steps);
938 WideIV->replaceUsesWithIf(Steps,
939 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
941 return U.usesFirstLaneOnly(WideIV);
942 return U.usesScalars(WideIV);
958 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
963 if (!Def || Def->getNumOperands() != 2)
971 auto IsWideIVInc = [&]() {
972 auto &
ID = WideIV->getInductionDescriptor();
975 VPValue *IVStep = WideIV->getStepValue();
976 switch (
ID.getInductionOpcode()) {
977 case Instruction::Add:
979 case Instruction::FAdd:
981 case Instruction::FSub:
984 case Instruction::Sub: {
1004 return IsWideIVInc() ? WideIV :
nullptr;
1023 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1034 VPValue *FirstActiveLane =
B.createFirstActiveLane(Mask,
DL);
1036 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
1037 FirstActiveLaneType,
DL);
1038 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1043 if (Incoming != WideIV) {
1045 EndValue =
B.createAdd(EndValue, One,
DL);
1048 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1050 VPIRValue *Start = WideIV->getStartValue();
1051 VPValue *Step = WideIV->getStepValue();
1052 EndValue =
B.createDerivedIV(
1054 Start, EndValue, Step);
1069 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1076 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1079 Start, VectorTC, Step);
1108 assert(EndValue &&
"Must have computed the end value up front");
1113 if (Incoming != WideIV)
1125 auto *Zero = Plan.
getZero(StepTy);
1126 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1131 return B.createNaryOp(
1132 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1134 : Instruction::FAdd,
1135 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1147 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1156 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1157 EndValues[WideIV] = EndValue;
1167 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1168 R.eraseFromParent();
1177 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1179 if (PredVPBB == MiddleVPBB)
1181 Plan, TypeInfo, ExitIRI->getOperand(Idx), EndValues, PSE);
1184 Plan, TypeInfo, ExitIRI->getOperand(Idx), PSE);
1186 ExitIRI->setOperand(Idx, Escape);
1203 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1206 ExpR->replaceAllUsesWith(V->second);
1207 ExpR->eraseFromParent();
1216 while (!WorkList.
empty()) {
1218 if (!Seen.
insert(Cur).second)
1226 R->eraseFromParent();
1233static std::optional<std::pair<bool, unsigned>>
1236 std::optional<std::pair<bool, unsigned>>>(R)
1239 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1241 return std::make_pair(
true,
I->getVectorIntrinsicID());
1243 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1249 I->getVPRecipeID());
1251 .
Default([](
auto *) {
return std::nullopt; });
1269 Value *V =
Op->getUnderlyingValue();
1275 auto FoldToIRValue = [&]() ->
Value * {
1277 if (OpcodeOrIID->first) {
1278 if (R.getNumOperands() != 2)
1280 unsigned ID = OpcodeOrIID->second;
1281 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1284 unsigned Opcode = OpcodeOrIID->second;
1293 return Folder.FoldSelect(
Ops[0],
Ops[1],
1296 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1298 case Instruction::Select:
1299 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1300 case Instruction::ICmp:
1301 case Instruction::FCmp:
1304 case Instruction::GetElementPtr: {
1307 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1317 case Instruction::ExtractElement:
1324 if (
Value *V = FoldToIRValue())
1325 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1331 VPlan *Plan = Def->getParent()->getPlan();
1337 return Def->replaceAllUsesWith(V);
1343 PredPHI->replaceAllUsesWith(
Op);
1356 bool CanCreateNewRecipe =
1363 if (TruncTy == ATy) {
1364 Def->replaceAllUsesWith(
A);
1373 : Instruction::ZExt;
1376 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1378 Ext->setUnderlyingValue(UnderlyingExt);
1380 Def->replaceAllUsesWith(Ext);
1382 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1383 Def->replaceAllUsesWith(Trunc);
1391 for (
VPUser *U :
A->users()) {
1393 for (
VPValue *VPV : R->definedValues())
1407 Def->replaceAllUsesWith(
X);
1408 Def->eraseFromParent();
1414 return Def->replaceAllUsesWith(
1419 return Def->replaceAllUsesWith(
X);
1423 return Def->replaceAllUsesWith(
1428 return Def->replaceAllUsesWith(
1433 return Def->replaceAllUsesWith(
X);
1437 return Def->replaceAllUsesWith(Plan->
getFalse());
1441 return Def->replaceAllUsesWith(
X);
1444 if (CanCreateNewRecipe &&
1449 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1450 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1451 return Def->replaceAllUsesWith(
1452 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1457 return Def->replaceAllUsesWith(Def->getOperand(1));
1462 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1466 return Def->replaceAllUsesWith(Plan->
getFalse());
1469 return Def->replaceAllUsesWith(
X);
1473 if (CanCreateNewRecipe &&
1475 return Def->replaceAllUsesWith(Builder.createNot(
C));
1479 Def->setOperand(0,
C);
1480 Def->setOperand(1,
Y);
1481 Def->setOperand(2,
X);
1486 if (CanCreateNewRecipe &&
1491 return Def->replaceAllUsesWith(
1492 Builder.createOr(
Y, Builder.createLogicalAnd(
X, Z)));
1495 return Def->replaceAllUsesWith(
A);
1498 return Def->replaceAllUsesWith(
A);
1501 return Def->replaceAllUsesWith(
1508 return Def->replaceAllUsesWith(
1510 Def->getDebugLoc(),
"", NW));
1513 if (CanCreateNewRecipe &&
1521 ->hasNoSignedWrap()};
1522 return Def->replaceAllUsesWith(
1523 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1529 return Def->replaceAllUsesWith(Builder.createNaryOp(
1531 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1536 return Def->replaceAllUsesWith(Builder.createNaryOp(
1538 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1543 return Def->replaceAllUsesWith(
A);
1558 R->setOperand(1,
Y);
1559 R->setOperand(2,
X);
1563 R->replaceAllUsesWith(Cmp);
1568 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1569 Cmp->setDebugLoc(Def->getDebugLoc());
1581 if (
Op->getNumUsers() > 1 ||
1585 }
else if (!UnpairedCmp) {
1586 UnpairedCmp =
Op->getDefiningRecipe();
1590 UnpairedCmp =
nullptr;
1597 if (NewOps.
size() < Def->getNumOperands()) {
1599 return Def->replaceAllUsesWith(NewAnyOf);
1606 if (CanCreateNewRecipe &&
1612 return Def->replaceAllUsesWith(NewCmp);
1620 return Def->replaceAllUsesWith(Def->getOperand(1));
1626 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1627 Def->replaceAllUsesWith(
X);
1637 Def->setOperand(1, Def->getOperand(0));
1638 Def->setOperand(0,
Y);
1645 return Def->replaceAllUsesWith(Def->getOperand(0));
1651 Def->replaceAllUsesWith(
1652 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1657 return Def->replaceAllUsesWith(
X);
1660 return Def->replaceAllUsesWith(
A);
1663 return Def->replaceAllUsesWith(
A);
1669 Def->replaceAllUsesWith(
1670 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1677 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1682 Def->replaceAllUsesWith(
1692 "broadcast operand must be single-scalar");
1693 Def->setOperand(0,
C);
1698 return Def->replaceUsesWithIf(
1699 X, [Def](
const VPUser &U,
unsigned) {
return U.usesScalars(Def); });
1702 if (Def->getNumOperands() == 1) {
1703 Def->replaceAllUsesWith(Def->getOperand(0));
1708 Phi->replaceAllUsesWith(Phi->getOperand(0));
1714 if (Def->getNumOperands() == 1 &&
1716 return Def->replaceAllUsesWith(IRV);
1729 return Def->replaceAllUsesWith(
A);
1732 Def->replaceAllUsesWith(Builder.createNaryOp(
1733 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1747 auto *IVInc = Def->getOperand(0);
1748 if (IVInc->getNumUsers() == 2) {
1753 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1754 Def->replaceAllUsesWith(IVInc);
1756 Inc->replaceAllUsesWith(Phi);
1757 Phi->setOperand(0,
Y);
1773 Steps->replaceAllUsesWith(Steps->getOperand(0));
1781 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1783 return PhiR && PhiR->isInLoop();
1789 return Def->replaceAllUsesWith(
A);
1816 while (!Worklist.
empty()) {
1825 R->replaceAllUsesWith(
1826 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1830static std::optional<Instruction::BinaryOps>
1833 case Intrinsic::masked_udiv:
1834 return Instruction::UDiv;
1835 case Intrinsic::masked_sdiv:
1836 return Instruction::SDiv;
1837 case Intrinsic::masked_urem:
1838 return Instruction::URem;
1839 case Intrinsic::masked_srem:
1840 return Instruction::SRem;
1857 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1861 if (RepR && RepR->getOpcode() == Instruction::Store &&
1864 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1865 true ,
nullptr , *RepR ,
1866 *RepR , RepR->getDebugLoc());
1867 Clone->insertBefore(RepOrWidenR);
1869 VPValue *ExtractOp = Clone->getOperand(0);
1875 Clone->setOperand(0, ExtractOp);
1876 RepR->eraseFromParent();
1888 VPValue *SafeDivisor = Builder.createSelect(
1889 IntrR->getOperand(2), IntrR->getOperand(1),
1891 VPValue *Clone = Builder.createNaryOp(
1892 *
Opc, {IntrR->getOperand(0), SafeDivisor},
1895 IntrR->eraseFromParent();
1904 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1913 return !U->usesScalars(
Op);
1917 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1920 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1921 IntroducesBCastOf(Op)))
1925 auto *IRV = dyn_cast<VPIRValue>(Op);
1926 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1927 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1928 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1933 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1934 true ,
nullptr, *RepOrWidenR);
1935 Clone->insertBefore(RepOrWidenR);
1936 RepOrWidenR->replaceAllUsesWith(Clone);
1938 RepOrWidenR->eraseFromParent();
1974 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1975 UniqueValues.
insert(Blend->getIncomingValue(0));
1976 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1978 UniqueValues.
insert(Blend->getIncomingValue(
I));
1980 if (UniqueValues.
size() == 1) {
1981 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1982 Blend->eraseFromParent();
1986 if (Blend->isNormalized())
1992 unsigned StartIndex = 0;
1993 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1998 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
2005 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
2007 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2008 if (
I == StartIndex)
2010 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
2011 OperandsWithMask.
push_back(Blend->getMask(
I));
2016 OperandsWithMask, *Blend, Blend->getDebugLoc());
2017 NewBlend->insertBefore(&R);
2019 VPValue *DeadMask = Blend->getMask(StartIndex);
2021 Blend->eraseFromParent();
2026 if (NewBlend->getNumOperands() == 3 &&
2028 VPValue *Inc0 = NewBlend->getOperand(0);
2029 VPValue *Inc1 = NewBlend->getOperand(1);
2030 VPValue *OldMask = NewBlend->getOperand(2);
2031 NewBlend->setOperand(0, Inc1);
2032 NewBlend->setOperand(1, Inc0);
2033 NewBlend->setOperand(2, NewMask);
2060 APInt MaxVal = AlignedTC - 1;
2063 unsigned NewBitWidth =
2069 bool MadeChange =
false;
2078 if (!WideIV || !WideIV->isCanonical() ||
2079 WideIV->hasMoreThanOneUniqueUser() ||
2080 NewIVTy == WideIV->getScalarType())
2085 VPUser *SingleUser = WideIV->getSingleUser();
2093 assert(!WideIV->getTruncInst() &&
2094 "canonical IV is not expected to have a truncation");
2096 WideIV->getPHINode(), Plan.
getZero(NewIVTy),
2098 WideIV->getInductionDescriptor(), *WideIV, WideIV->getDebugLoc());
2099 NewWideIV->insertBefore(WideIV);
2106 Cmp->replaceAllUsesWith(
2107 VPBuilder(Cmp).createICmp(Cmp->getPredicate(), NewWideIV, NewBTC));
2121 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2123 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2137 const SCEV *VectorTripCount =
2142 "Trip count SCEV must be computable");
2163 auto *Term = &ExitingVPBB->
back();
2176 for (
unsigned Part = 0; Part < UF; ++Part) {
2182 Extracts[Part] = Ext;
2194 match(Phi->getBackedgeValue(),
2196 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2213 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2220 "Expected incoming values of Phi to be ActiveLaneMasks");
2225 EntryALM->setOperand(2, ALMMultiplier);
2226 LoopALM->setOperand(2, ALMMultiplier);
2230 ExtractFromALM(EntryALM, EntryExtracts);
2235 ExtractFromALM(LoopALM, LoopExtracts);
2237 Not->setOperand(0, LoopExtracts[0]);
2240 for (
unsigned Part = 0; Part < UF; ++Part) {
2241 Phis[Part]->setStartValue(EntryExtracts[Part]);
2242 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2255 auto *Term = &ExitingVPBB->
back();
2267 const SCEV *VectorTripCount =
2273 "Trip count SCEV must be computable");
2292 Term->setOperand(1, Plan.
getTrue());
2297 {}, Term->getDebugLoc());
2299 Term->eraseFromParent();
2334 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2344 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2345 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2363 RecurKind RK = PhiR->getRecurrenceKind();
2370 RecWithFlags->dropPoisonGeneratingFlags();
2376struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2378 return Def == getEmptyKey() || Def == getTombstoneKey();
2389 return GEP->getSourceElementType();
2392 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2393 [](
auto *
I) {
return I->getSourceElementType(); })
2394 .
Default([](
auto *) {
return nullptr; });
2398 static bool canHandle(
const VPSingleDefRecipe *Def) {
2407 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2408 C->second == Instruction::ExtractValue)))
2414 return !
Def->mayReadFromMemory();
2418 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2419 const VPlan *Plan =
Def->getParent()->getPlan();
2420 VPTypeAnalysis TypeInfo(*Plan);
2423 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2426 if (RFlags->hasPredicate())
2429 return hash_combine(Result, SIVSteps->getInductionOpcode());
2434 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2437 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2439 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2441 !
equal(
L->operands(),
R->operands()))
2444 "must have valid opcode info for both recipes");
2446 if (LFlags->hasPredicate() &&
2447 LFlags->getPredicate() !=
2451 if (LSIV->getInductionOpcode() !=
2457 const VPRegionBlock *RegionL =
L->getRegion();
2458 const VPRegionBlock *RegionR =
R->getRegion();
2461 L->getParent() !=
R->getParent())
2463 const VPlan *Plan =
L->getParent()->getPlan();
2464 VPTypeAnalysis TypeInfo(*Plan);
2465 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2481 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2485 if (!VPDT.
dominates(V->getParent(), VPBB))
2490 Def->replaceAllUsesWith(V);
2521 "Expected vector prehader's successor to be the vector loop region");
2529 return !Op->isDefinedOutsideLoopRegions();
2532 R.moveBefore(*Preheader, Preheader->
end());
2550 assert(!RepR->isPredicated() &&
2551 "Expected prior transformation of predicated replicates to "
2552 "replicate regions");
2557 if (!RepR->isSingleScalar())
2569 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2570 auto *UserR = cast<VPRecipeBase>(U);
2571 VPBasicBlock *Parent = UserR->getParent();
2573 if (SinkBB && SinkBB != Parent)
2578 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2579 Parent->getSinglePredecessor() != LoopRegion;
2589 "Defining block must dominate sink block");
2615 VPValue *ResultVPV = R.getVPSingleValue();
2617 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2618 if (!NewResSizeInBits)
2631 (void)OldResSizeInBits;
2639 VPW->dropPoisonGeneratingFlags();
2641 assert((OldResSizeInBits != NewResSizeInBits ||
2643 "Only ICmps should not need extending the result.");
2649 if (OldResSizeInBits != NewResSizeInBits) {
2651 Instruction::ZExt, ResultVPV, OldResTy);
2653 Ext->setOperand(0, ResultVPV);
2663 unsigned OpSizeInBits =
2665 if (OpSizeInBits == NewResSizeInBits)
2667 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2668 auto [ProcessedIter, Inserted] = ProcessedTruncs.
try_emplace(
Op);
2674 Builder.setInsertPoint(&R);
2675 ProcessedIter->second =
2676 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2678 Op = ProcessedIter->second;
2682 NWR->insertBefore(&R);
2686 VPValue *Replacement = NWR->getVPSingleValue();
2687 if (OldResSizeInBits != NewResSizeInBits)
2693 R.eraseFromParent();
2699 std::optional<VPDominatorTree> VPDT;
2716 assert(VPBB->getNumSuccessors() == 2 &&
2717 "Two successors expected for BranchOnCond");
2718 unsigned RemovedIdx;
2729 "There must be a single edge between VPBB and its successor");
2737 VPBB->back().eraseFromParent();
2749 if (Reachable.contains(
B))
2760 for (
VPValue *Def : R.definedValues())
2761 Def->replaceAllUsesWith(&Tmp);
2762 R.eraseFromParent();
2819 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2830 auto *EntryIncrement = Builder.createOverflowingOp(
2832 DL,
"index.part.next");
2838 {EntryIncrement, TC, ALMMultiplier},
DL,
2839 "active.lane.mask.entry");
2846 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2851 Builder.setInsertPoint(OriginalTerminator);
2852 auto *InLoopIncrement = Builder.createOverflowingOp(
2854 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2856 {InLoopIncrement, TC, ALMMultiplier},
DL,
2857 "active.lane.mask.next");
2862 auto *NotMask = Builder.createNot(ALM,
DL);
2869 bool UseActiveLaneMaskForControlFlow) {
2873 assert(WideCanonicalIV &&
2874 "Must have widened canonical IV when tail folding!");
2877 if (UseActiveLaneMaskForControlFlow) {
2886 nullptr,
"active.lane.mask");
2902 template <
typename OpTy>
bool match(OpTy *V)
const {
2913template <
typename Op0_t,
typename Op1_t>
2921 case Intrinsic::masked_udiv:
2922 return Intrinsic::vp_udiv;
2923 case Intrinsic::masked_sdiv:
2924 return Intrinsic::vp_sdiv;
2925 case Intrinsic::masked_urem:
2926 return Intrinsic::vp_urem;
2927 case Intrinsic::masked_srem:
2928 return Intrinsic::vp_srem;
2930 return std::nullopt;
2947 VPValue *Addr, *Mask, *EndPtr;
2950 auto AdjustEndPtr = [&CurRecipe, &EVL, &TypeInfo](
VPValue *EndPtr) {
2952 EVLEndPtr->insertBefore(&CurRecipe);
2957 EVLEndPtr->setOperand(1, EVLAsVF);
2961 auto GetVPReverse = [&CurRecipe, &EVL, &TypeInfo, Plan,
2966 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
2968 Reverse->insertBefore(&CurRecipe);
2972 if (
match(&CurRecipe,
2983 Mask = GetVPReverse(Mask);
2984 Addr = AdjustEndPtr(EndPtr);
2987 LoadR->insertBefore(&CurRecipe);
2989 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
3001 NewLoad->setOperand(2, Mask);
3002 NewLoad->setOperand(3, &EVL);
3010 StoredVal, EVL, Mask);
3012 if (
match(&CurRecipe,
3016 Mask = GetVPReverse(Mask);
3017 Addr = AdjustEndPtr(EndPtr);
3018 StoredVal = GetVPReverse(ReversedVal);
3020 StoredVal, EVL, Mask);
3024 if (Rdx->isConditional() &&
3029 if (Interleave->getMask() &&
3034 if (
match(&CurRecipe,
3043 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3057 if (
match(&CurRecipe,
3068 {IntrR->getOperand(0),
3069 IntrR->getOperand(1),
3070 Mask ? Mask : Plan->
getTrue(), &EVL},
3071 IntrR->getScalarType(), {}, {},
DL);
3080 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3085 HeaderMask = R.getVPSingleValue();
3097 NewR->insertBefore(R);
3098 for (
auto [Old, New] :
3099 zip_equal(R->definedValues(), NewR->definedValues()))
3100 Old->replaceAllUsesWith(New);
3114 Merge->insertBefore(LogicalAnd);
3115 LogicalAnd->replaceAllUsesWith(
Merge);
3123 R->eraseFromParent();
3146 auto IsAllowedUser =
3147 IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
3148 VPWidenIntOrFpInductionRecipe,
3149 VPWidenMemIntrinsicRecipe>;
3150 if (match(U, m_Trunc(m_Specific(&Plan.getVF()))))
3151 return all_of(cast<VPSingleDefRecipe>(U)->users(),
3153 return IsAllowedUser(U);
3155 "User of VF that we can't transform to EVL.");
3165 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3166 "increment of the canonical induction.");
3182 MaxEVL = Builder.createScalarZExtOrTrunc(
3186 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3187 VPValue *PrevEVL = Builder.createScalarPhi(
3201 Intrinsic::experimental_vp_splice,
3202 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3206 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3219 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3220 m_VPValue(), m_VPValue()))))
3221 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3222 Plan.getVectorLoopRegion();
3234 VPValue *EVLMask = Builder.createICmp(
3294 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3306 auto *CurrentIteration =
3308 CurrentIteration->insertBefore(*Header, Header->begin());
3309 VPBuilder Builder(Header, Header->getFirstNonPhi());
3312 VPPhi *AVLPhi = Builder.createScalarPhi(
3316 if (MaxSafeElements) {
3326 Builder.setInsertPoint(CanonicalIVIncrement);
3330 OpVPEVL = Builder.createScalarZExtOrTrunc(
3331 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3333 auto *NextIter = Builder.createAdd(
3334 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3335 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3336 CurrentIteration->addOperand(NextIter);
3340 "avl.next", {
true,
false});
3348 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3349 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3363 assert(!CurrentIteration &&
3364 "Found multiple CurrentIteration. Only one expected");
3365 CurrentIteration = PhiR;
3369 if (!CurrentIteration)
3380 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3389 CanIVInc->eraseFromParent();
3398 if (Header->empty())
3407 if (!
match(EVLPhi->getBackedgeValue(),
3420 [[maybe_unused]]
bool FoundAVLNext =
3423 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3431 [[maybe_unused]]
bool FoundIncrement =
match(
3438 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3443 LatchBr->setOperand(
3454 "expected to run before loop regions are created");
3457 auto CanUseVersionedStride = [&VPDT, Preheader](
VPUser &U,
unsigned) {
3460 return VPDT.
dominates(Preheader, Parent);
3463 for (
const SCEV *Stride : StridesMap.
values()) {
3466 const APInt *StrideConst;
3489 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3496 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3499 if (NewSCEV != ScevExpr) {
3501 ExpSCEV->replaceAllUsesWith(NewExp);
3512 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3517 while (!Worklist.
empty()) {
3520 if (!Visited.
insert(CurRec).second)
3542 RecWithFlags->isDisjoint()) {
3545 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3546 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3547 RecWithFlags->replaceAllUsesWith(New);
3548 RecWithFlags->eraseFromParent();
3551 RecWithFlags->dropPoisonGeneratingFlags();
3556 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3557 "found instruction with poison generating flags not covered by "
3558 "VPRecipeWithIRFlags");
3563 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3571 auto IsNotHeaderMask = [&Plan](
VPValue *Mask) {
3583 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3584 if (AddrDef && WidenRec->isConsecutive() &&
3585 IsNotHeaderMask(WidenRec->getMask()))
3586 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3588 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3589 if (AddrDef && IsNotHeaderMask(InterleaveRec->getMask()))
3590 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3600 const bool &EpilogueAllowed) {
3601 if (InterleaveGroups.empty())
3612 IRMemberToRecipe[&MemR->getIngredient()] = MemR;
3619 for (
const auto *IG : InterleaveGroups) {
3624 return !IRMemberToRecipe.contains(Member);
3628 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3632 StoredValues.
push_back(StoreR->getStoredValue());
3633 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3639 StoredValues.
push_back(StoreR->getStoredValue());
3643 bool NeedsMaskForGaps =
3644 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3645 (!StoredValues.
empty() && !IG->isFull());
3648 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3657 VPValue *Addr = Start->getAddr();
3666 assert(IG->getIndex(IRInsertPos) != 0 &&
3667 "index of insert position shouldn't be zero");
3671 IG->getIndex(IRInsertPos),
3675 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3681 if (IG->isReverse()) {
3684 -(int64_t)IG->getFactor(), NW, InsertPosR->
getDebugLoc());
3685 ReversePtr->insertBefore(InsertPosR);
3689 IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps,
3691 VPIG->insertBefore(InsertPosR);
3694 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3697 if (!Member->getType()->isVoidTy()) {
3756 AddOp = Instruction::Add;
3757 MulOp = Instruction::Mul;
3759 AddOp =
ID.getInductionOpcode();
3760 MulOp = Instruction::FMul;
3768 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3769 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3778 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3783 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3784 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3800 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3804 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3807 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3810 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3817 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3820 WidePHI->addOperand(
Next);
3848 VPlan *Plan = R->getParent()->getPlan();
3849 VPValue *Start = R->getStartValue();
3850 VPValue *Step = R->getStepValue();
3851 VPValue *VF = R->getVFValue();
3853 assert(R->getInductionDescriptor().getKind() ==
3855 "Not a pointer induction according to InductionDescriptor!");
3858 "Recipe should have been replaced");
3864 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3868 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3871 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3873 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3874 R->replaceAllUsesWith(PtrAdd);
3879 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3881 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3884 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3892 VPValue *Step = R->getStepValue();
3893 VPValue *Index = R->getIndex();
3897 ? Builder.createScalarSExtOrTrunc(
3899 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
3901 switch (R->getInductionKind()) {
3904 "Index type does not match StartValue type");
3905 return R->replaceAllUsesWith(Builder.createAdd(
3906 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3909 return R->replaceAllUsesWith(Builder.createPtrAdd(
3910 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3915 (FPBinOp->
getOpcode() == Instruction::FAdd ||
3916 FPBinOp->
getOpcode() == Instruction::FSub) &&
3917 "Original BinOp should be defined for FP induction");
3919 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
3920 return R->replaceAllUsesWith(
3921 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
3934 if (!R->isReplicator())
3938 R->dissolveToCFGLoop();
3959 assert(Br->getNumOperands() == 2 &&
3960 "BranchOnTwoConds must have exactly 2 conditions");
3964 assert(Successors.size() == 3 &&
3965 "BranchOnTwoConds must have exactly 3 successors");
3970 VPValue *Cond0 = Br->getOperand(0);
3971 VPValue *Cond1 = Br->getOperand(1);
3976 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
3989 Br->eraseFromParent();
4012 WidenIVR->replaceAllUsesWith(PtrAdd);
4028 VPValue *CanIV = WideCanIV->getCanonicalIV();
4030 VPValue *Step = WideCanIV->getStepValue();
4033 "Expected unroller to have materialized step for UF != 1");
4038 Step = Builder.createAdd(
4041 Builder.createAdd(CanIV, Step, WideCanIV->getDebugLoc(),
"vec.iv",
4042 WideCanIV->getNoWrapFlags());
4051 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4052 Select = Builder.createSelect(Blend->getMask(
I),
4053 Blend->getIncomingValue(
I),
Select,
4054 R.getDebugLoc(),
"predphi", *Blend);
4055 Blend->replaceAllUsesWith(
Select);
4060 if (!VEPR->getOffset()) {
4062 "Expected unroller to have materialized offset for UF != 1");
4063 VEPR->materializeOffset();
4078 for (
VPValue *
Op : LastActiveL->operands()) {
4079 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4084 VPValue *FirstInactiveLane = Builder.createFirstActiveLane(
4085 NotMasks, LastActiveL->getDebugLoc(),
"first.inactive.lane");
4091 Builder.createSub(FirstInactiveLane, One,
4092 LastActiveL->getDebugLoc(),
"last.active.lane");
4102 assert(VPI->isMasked() &&
4103 "Unmasked MaskedCond should be simplified earlier");
4104 VPI->replaceAllUsesWith(Builder.createNaryOp(
4116 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4117 VPI->getDebugLoc());
4118 VPI->replaceAllUsesWith(
Add);
4127 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4130 ToRemove.push_back(BranchOnCountInst);
4145 ? Instruction::UIToFP
4146 : Instruction::Trunc;
4147 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4153 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4159 MulOpc = Instruction::FMul;
4160 Flags = VPI->getFastMathFlags();
4162 MulOpc = Instruction::Mul;
4167 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4169 VPI->replaceAllUsesWith(VectorStep);
4175 R->eraseFromParent();
4183 struct EarlyExitInfo {
4194 if (Pred == MiddleVPBB)
4199 VPValue *CondOfEarlyExitingVPBB;
4200 [[maybe_unused]]
bool Matched =
4201 match(EarlyExitingVPBB->getTerminator(),
4203 assert(Matched &&
"Terminator must be BranchOnCond");
4207 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4208 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4210 TrueSucc == ExitBlock
4211 ? CondOfEarlyExitingVPBB
4212 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4218 "exit condition must dominate the latch");
4227 assert(!Exits.
empty() &&
"must have at least one early exit");
4234 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4236 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4237 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4243 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4244 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4246 Exits[
I].EarlyExitingVPBB) &&
4247 "RPO sort must place dominating exits before dominated ones");
4253 VPValue *Combined = Exits[0].CondToExit;
4254 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4255 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4261 "Early exit store masking not implemented");
4265 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4269 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4277 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4311 for (
auto [Exit, VectorEarlyExitVPBB] :
4312 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4313 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4325 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4326 VPValue *NewIncoming = IncomingVal;
4328 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4333 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4334 ExitIRI->addOperand(NewIncoming);
4337 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4371 bool IsLastDispatch = (
I + 2 == Exits.
size());
4373 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4379 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4382 CurrentBB = FalseBB;
4389 "Unexpected terminator");
4390 auto *IsLatchExitTaken =
4392 LatchExitingBranch->getOperand(1));
4394 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4395 LatchExitingBranch->eraseFromParent();
4396 Builder.setInsertPoint(LatchVPBB);
4398 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4400 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4410 Type *RedTy = Ctx.Types.inferScalarType(Red);
4411 VPValue *VecOp = Red->getVecOp();
4413 assert(!Red->isPartialReduction() &&
4414 "This path does not support partial reductions");
4417 auto IsExtendedRedValidAndClampRange =
4430 "getExtendedReductionCost only supports integer types");
4431 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4432 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4433 Red->getFastMathFlags(),
CostKind);
4434 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4442 IsExtendedRedValidAndClampRange(
4445 Ctx.Types.inferScalarType(
A)))
4464 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4465 Opcode != Instruction::FAdd)
4468 assert(!Red->isPartialReduction() &&
4469 "This path does not support partial reductions");
4470 Type *RedTy = Ctx.Types.inferScalarType(Red);
4473 auto IsMulAccValidAndClampRange =
4480 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4486 (Ext0->getOpcode() != Ext1->getOpcode() ||
4487 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4491 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4493 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4500 ExtCost += Ext0->computeCost(VF, Ctx);
4502 ExtCost += Ext1->computeCost(VF, Ctx);
4504 ExtCost += OuterExt->computeCost(VF, Ctx);
4506 return MulAccCost.
isValid() &&
4507 MulAccCost < ExtCost + MulCost + RedCost;
4512 VPValue *VecOp = Red->getVecOp();
4550 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4551 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4552 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4553 Mul->setOperand(1, ExtB);
4563 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4568 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4575 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4592 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4601 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4602 Ext0->getOpcode() == Ext1->getOpcode() &&
4603 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4605 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getScalarType(),
nullptr,
4606 *Ext0, *Ext0, Ext0->getDebugLoc());
4607 NewExt0->insertBefore(Ext0);
4612 Ext->getScalarType(),
nullptr, *Ext1,
4613 *Ext1, Ext1->getDebugLoc());
4616 auto *NewMul =
Mul->cloneWithOperands({NewExt0, NewExt1});
4617 NewMul->insertBefore(
Mul);
4618 Ext->replaceAllUsesWith(NewMul);
4619 Ext->eraseFromParent();
4620 Mul->eraseFromParent();
4634 assert(!Red->isPartialReduction() &&
4635 "This path does not support partial reductions");
4638 auto IP = std::next(Red->getIterator());
4639 auto *VPBB = Red->getParent();
4649 Red->replaceAllUsesWith(AbstractR);
4679 for (
VPValue *VPV : VPValues) {
4688 if (
User->usesScalars(VPV))
4691 HoistPoint = HoistBlock->
begin();
4695 "All users must be in the vector preheader or dominated by it");
4700 VPV->replaceUsesWithIf(Broadcast,
4701 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4702 return Broadcast != &U && !U.usesScalars(VPV);
4713 return CommonMetadata;
4716template <
unsigned Opcode>
4721 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4722 "Only Load and Store opcodes supported");
4723 constexpr bool IsLoad = (Opcode == Instruction::Load);
4729 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4734 for (
auto Recipes :
Groups) {
4735 if (Recipes.size() < 2)
4743 VPValue *MaskI = RecipeI->getMask();
4744 Type *TypeI = GetLoadStoreValueType(RecipeI);
4750 bool HasComplementaryMask =
false;
4755 VPValue *MaskJ = RecipeJ->getMask();
4756 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4757 if (TypeI == TypeJ) {
4767 if (HasComplementaryMask) {
4768 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4778template <
typename InstType>
4796 for (
auto &Group :
Groups) {
4816 return R->isSingleScalar() == IsSingleScalar;
4818 "all members in group must agree on IsSingleScalar");
4823 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4824 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4826 UnpredicatedLoad->insertBefore(EarliestLoad);
4830 Load->replaceAllUsesWith(UnpredicatedLoad);
4831 Load->eraseFromParent();
4841 if (!StoreLoc || !StoreLoc->AATags.Scope)
4847 StoresToSink.
end());
4851 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4865 for (
auto &Group :
Groups) {
4878 VPValue *SelectedValue = Group[0]->getOperand(0);
4881 bool IsSingleScalar = Group[0]->isSingleScalar();
4882 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4883 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4884 "all members in group must agree on IsSingleScalar");
4885 VPValue *Mask = Group[
I]->getMask();
4887 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4896 StoreWithMinAlign->getUnderlyingInstr(),
4897 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4898 nullptr, *LastStore, CommonMetadata);
4899 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4903 Store->eraseFromParent();
4910 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4911 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4975 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4977 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4984 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4994 DefR->replaceUsesWithIf(
4995 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4997 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5011 for (
VPValue *Def : R.definedValues()) {
5024 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5026 return U->usesScalars(Def) &&
5029 if (
none_of(Def->users(), IsCandidateUnpackUser))
5036 Unpack->insertAfter(&R);
5037 Def->replaceUsesWithIf(Unpack,
5038 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5039 return IsCandidateUnpackUser(&U);
5048 bool RequiresScalarEpilogue,
VPValue *Step,
5049 std::optional<uint64_t> MaxRuntimeStep) {
5060 assert(StepR->getParent() == VectorPHVPBB &&
5061 "Step must be defined in VectorPHVPBB");
5063 InsertPt = std::next(StepR->getIterator());
5065 VPBuilder Builder(VectorPHVPBB, InsertPt);
5071 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5083 if (TailByMasking) {
5084 TC = Builder.createAdd(
5095 Builder.createNaryOp(Instruction::URem, {TC, Step},
5104 if (RequiresScalarEpilogue) {
5106 "requiring scalar epilogue is not supported with fail folding");
5109 R = Builder.createSelect(IsZero, Step, R);
5123 "VF and VFxUF must be materialized together");
5135 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5142 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5146 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5150 VPValue *MulByUF = Builder.createOverflowingOp(
5162 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5170 const SCEV *Expr = ExpSCEV->getSCEV();
5173 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5178 ExpSCEV->eraseFromParent();
5181 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5182 "before any VPIRInstructions");
5185 auto EI = Entry->begin();
5195 return ExpandedSCEVs;
5207 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5211 return Member0Op == OpV;
5215 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5218 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5235 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5238 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5243 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5244 const auto &[
OpIdx, OpV] =
P;
5259 if (!InterleaveR || InterleaveR->
getMask())
5260 return std::nullopt;
5262 Type *GroupElementTy =
nullptr;
5266 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5267 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5269 return std::nullopt;
5274 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5275 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5277 return std::nullopt;
5281 if (IG->getFactor() != IG->getNumMembers())
5282 return std::nullopt;
5288 assert(
Size.isScalable() == VF.isScalable() &&
5289 "if Size is scalable, VF must be scalable and vice versa");
5290 return Size.getKnownMinValue();
5294 unsigned MinVal = VF.getKnownMinValue();
5296 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5299 return std::nullopt;
5307 return RepR && RepR->isSingleScalar();
5314 auto *R = V->getDefiningRecipe();
5323 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5324 WideMember0->setOperand(
5333 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5335 LoadGroup->getMask(),
true,
5336 {}, LoadGroup->getDebugLoc());
5337 L->insertBefore(LoadGroup);
5343 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5344 "must be a single scalar load");
5345 NarrowedOps.
insert(RepR);
5350 VPValue *PtrOp = WideLoad->getAddr();
5352 PtrOp = VecPtr->getOperand(0);
5357 nullptr, {}, *WideLoad);
5358 N->insertBefore(WideLoad);
5363std::unique_ptr<VPlan>
5383 "unexpected branch-on-count");
5387 std::optional<ElementCount> VFToOptimize;
5401 if (R.mayWriteToMemory() && !InterleaveR)
5407 return any_of(V->users(), [&](VPUser *U) {
5408 auto *UR = cast<VPRecipeBase>(U);
5409 return UR->getParent()->getParent() != VectorLoop;
5426 std::optional<ElementCount> NarrowedVF =
5428 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5430 VFToOptimize = NarrowedVF;
5433 if (InterleaveR->getStoredValues().empty())
5438 auto *Member0 = InterleaveR->getStoredValues()[0];
5448 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5451 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5452 return IR && IR->getInterleaveGroup()->isFull() &&
5453 IR->getVPValue(Op.index()) == Op.value();
5462 VFToOptimize->isScalable()))
5467 if (StoreGroups.empty())
5471 bool RequiresScalarEpilogue =
5482 std::unique_ptr<VPlan> NewPlan;
5484 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5485 Plan.
setVF(*VFToOptimize);
5486 NewPlan->removeVF(*VFToOptimize);
5492 for (
auto *StoreGroup : StoreGroups) {
5499 StoreGroup->getDebugLoc());
5500 S->insertBefore(StoreGroup);
5501 StoreGroup->eraseFromParent();
5507 Type *CanIVTy = VectorLoop->getCanonicalIVType();
5513 if (VFToOptimize->isScalable()) {
5516 Step = PHBuilder.createOverflowingOp(Instruction::Mul, {VScale,
UF},
5524 materializeVectorTripCount(Plan, VectorPH,
false,
5525 RequiresScalarEpilogue, Step);
5530 removeDeadRecipes(Plan);
5533 "All VPVectorPointerRecipes should have been removed");
5549 "must have a BranchOnCond");
5552 if (VF.
isScalable() && VScaleForTuning.has_value())
5553 VectorStep *= *VScaleForTuning;
5554 assert(VectorStep > 0 &&
"trip count should not be zero");
5558 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5578 "Cannot handle loops with uncountable early exits");
5585 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
5592 if (
any_of(RecurSplice->users(),
5593 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
5674 {},
"vector.recur.extract.for.phi");
5677 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
5691 VPValue *WidenIVCandidate = BinOp->getOperand(0);
5692 VPValue *InvariantCandidate = BinOp->getOperand(1);
5694 std::swap(WidenIVCandidate, InvariantCandidate);
5708 auto *ClonedOp = BinOp->
clone();
5709 if (ClonedOp->getOperand(0) == WidenIV) {
5710 ClonedOp->setOperand(0, ScalarIV);
5712 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
5713 ClonedOp->setOperand(1, ScalarIV);
5728 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
5729 bool UseMax) -> std::optional<APSInt> {
5731 for (
bool Signed : {
true,
false}) {
5740 return std::nullopt;
5748 PhiR->getRecurrenceKind()))
5757 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5771 !
match(FindLastSelect,
5780 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
5786 "IVOfExpressionToSink not being an AddRec must imply "
5787 "FindLastExpression not being an AddRec.");
5798 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
5799 bool UseSigned = SentinelVal && SentinelVal->isSigned();
5806 if (IVOfExpressionToSink) {
5807 const SCEV *FindLastExpressionSCEV =
5809 if (
match(FindLastExpressionSCEV,
5812 if (
auto NewSentinel =
5813 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
5816 SentinelVal = *NewSentinel;
5817 UseSigned = NewSentinel->isSigned();
5819 IVSCEV = FindLastExpressionSCEV;
5820 IVOfExpressionToSink =
nullptr;
5830 if (AR->hasNoSignedWrap())
5832 else if (AR->hasNoUnsignedWrap())
5842 VPValue *NewFindLastSelect = BackedgeVal;
5844 if (!SentinelVal || IVOfExpressionToSink) {
5847 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
5848 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
5849 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
5850 SelectCond = LoopBuilder.
createNot(SelectCond);
5857 if (SelectCond !=
Cond || IVOfExpressionToSink) {
5860 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
5869 VPIRFlags Flags(MinMaxKind,
false,
false,
5875 NewFindLastSelect, Flags, ExitDL);
5878 VPValue *VectorRegionExitingVal = ReducedIV;
5879 if (IVOfExpressionToSink)
5880 VectorRegionExitingVal =
5882 ReducedIV, IVOfExpressionToSink);
5885 VPValue *StartVPV = PhiR->getStartValue();
5892 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
5902 AnyOfPhi->insertAfter(PhiR);
5909 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
5922 PhiR->hasUsesOutsideReductionChain());
5923 NewPhiR->insertBefore(PhiR);
5924 PhiR->replaceAllUsesWith(NewPhiR);
5925 PhiR->eraseFromParent();
5932struct ReductionExtend {
5933 Type *SrcType =
nullptr;
5934 ExtendKind Kind = ExtendKind::PR_None;
5940struct ExtendedReductionOperand {
5944 ReductionExtend ExtendA, ExtendB;
5952struct VPPartialReductionChain {
5955 VPWidenRecipe *ReductionBinOp =
nullptr;
5957 ExtendedReductionOperand ExtendedOp;
5964 unsigned AccumulatorOpIdx;
5965 unsigned ScaleFactor;
5978 if (!
Op->hasOneUse() ||
5984 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
5985 Op->getOperand(1), NarrowTy);
5987 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
5996 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
5998 assert(Ext->getOpcode() ==
6000 "Expected both the LHS and RHS extends to be the same");
6001 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6004 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6005 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6006 auto *
Max = Builder.insert(
6008 {FreezeX, FreezeY}, SrcTy));
6009 auto *Min = Builder.insert(
6011 {FreezeX, FreezeY}, SrcTy));
6014 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6027 if (!
Mul->hasOneUse() ||
6028 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6029 MulLHS->getOpcode() != MulRHS->getOpcode())
6032 auto *NewLHS = Builder.createWidenCast(
6033 MulLHS->getOpcode(), MulLHS->getOperand(0), Ext->getScalarType());
6034 auto *NewRHS = MulLHS == MulRHS
6036 : Builder.createWidenCast(MulRHS->getOpcode(),
6037 MulRHS->getOperand(0),
6038 Ext->getScalarType());
6039 auto *NewMul =
Mul->cloneWithOperands({NewLHS, NewRHS});
6040 Builder.insert(NewMul);
6041 Op->replaceAllUsesWith(NewMul);
6042 Op->eraseFromParent();
6043 Mul->eraseFromParent();
6052 VPValue *VecOp = Red->getVecOp();
6086static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6094 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6110 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6118 Builder.insert(NegRecipe);
6119 ExtendedOp = NegRecipe;
6123 "FSub chain reduction isn't supported");
6126 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp, TypeInfo);
6136 assert((!ExitValue || IsLastInChain) &&
6137 "if we found ExitValue, it must match RdxPhi's backedge value");
6148 PartialRed->insertBefore(WidenRecipe);
6156 E->insertBefore(WidenRecipe);
6157 PartialRed->replaceAllUsesWith(
E);
6170 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6171 StartInst->setOperand(2, NewScaleFactor);
6179 VPValue *OldStartValue = StartInst->getOperand(0);
6180 StartInst->setOperand(0, StartInst->getOperand(1));
6184 assert(RdxResult &&
"Could not find reduction result");
6187 unsigned SubOpc = Chain.RK ==
RecurKind::FSub ? Instruction::BinaryOps::FSub
6188 : Instruction::BinaryOps::Sub;
6194 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6200 const VPPartialReductionChain &Link,
6203 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6204 std::optional<unsigned> BinOpc = std::nullopt;
6206 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6207 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6209 std::optional<llvm::FastMathFlags>
Flags;
6213 auto GetLinkOpcode = [&Link]() ->
unsigned {
6216 return Instruction::Add;
6218 return Instruction::FAdd;
6220 return Link.ReductionBinOp->
getOpcode();
6225 GetLinkOpcode(), ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType,
6226 RdxType, VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6247static std::optional<ExtendedReductionOperand>
6251 "Op should be operand of UpdateR");
6259 if (
Op->hasOneUse() &&
6269 if (LHSInputType != RHSInputType ||
6270 LHSExt->getOpcode() != RHSExt->getOpcode())
6271 return std::nullopt;
6274 return ExtendedReductionOperand{
6276 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6280 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6283 VPValue *CastSource = CastRecipe->getOperand(0);
6284 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6294 if (UpdateR->
getOpcode() == Instruction::Sub)
6295 return std::nullopt;
6296 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6297 UpdateR->
getOpcode() == Instruction::FAdd) {
6301 return ExtendedReductionOperand{
6308 if (!
Op->hasOneUse())
6309 return std::nullopt;
6314 return std::nullopt;
6324 return std::nullopt;
6328 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6331 const APInt *RHSConst =
nullptr;
6337 return std::nullopt;
6341 if (Cast && OuterExtKind &&
6342 getPartialReductionExtendKind(Cast) != OuterExtKind)
6343 return std::nullopt;
6345 Type *RHSInputType = LHSInputType;
6346 ExtendKind RHSExtendKind = LHSExtendKind;
6349 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6352 return ExtendedReductionOperand{
6353 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6360static std::optional<SmallVector<VPPartialReductionChain>>
6368 return std::nullopt;
6379 VPValue *CurrentValue = ExitValue;
6380 while (CurrentValue != RedPhiR) {
6383 return std::nullopt;
6390 std::optional<ExtendedReductionOperand> ExtendedOp =
6391 matchExtendedReductionOperand(UpdateR,
Op, TypeInfo);
6393 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue, TypeInfo);
6395 return std::nullopt;
6399 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6402 return std::nullopt;
6407 VPPartialReductionChain Link(
6408 {UpdateR, *ExtendedOp, RK,
6412 CurrentValue = PrevValue;
6417 std::reverse(Chain.
begin(), Chain.
end());
6436 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6437 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6440 if (ChainsByPhi.
empty())
6447 for (
const auto &[
_, Chains] : ChainsByPhi)
6448 for (
const VPPartialReductionChain &Chain : Chains) {
6449 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6450 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6456 auto ExtendUsersValid = [&](
VPValue *Ext) {
6458 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6462 auto IsProfitablePartialReductionChainForVF =
6469 for (
const VPPartialReductionChain &Link : Chain) {
6470 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6471 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6475 PartialCost += LinkCost;
6476 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6478 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6479 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6482 RegularCost += Extend->computeCost(VF, CostCtx);
6484 return PartialCost.
isValid() && PartialCost < RegularCost;
6492 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6493 for (
const VPPartialReductionChain &Chain : Chains) {
6494 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6498 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6500 return PhiR == RedPhiR;
6502 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6508 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6517 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6518 return RepR && RepR->getOpcode() == Instruction::Store;
6529 return IsProfitablePartialReductionChainForVF(Chains, VF);
6535 for (
auto &[Phi, Chains] : ChainsByPhi)
6536 for (
const VPPartialReductionChain &Chain : Chains)
6537 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
6551 if (VPI && VPI->getUnderlyingValue() &&
6563 New->insertBefore(VPI);
6564 if (VPI->getOpcode() == Instruction::Load)
6565 VPI->replaceAllUsesWith(New->getVPSingleValue());
6566 VPI->eraseFromParent();
6571 FinalRedStoresBuilder))
6580 ReplaceWith(Histogram);
6588 ReplaceWith(Recipe);
6611 if (VPI->mayHaveSideEffects())
6615 if (VPI->isMasked() && !VPI->isSafeToSpeculativelyExecute())
6620 if (VPI->getOpcode() == Instruction::Add &&
6629 I, VPI->operandsWithoutMask(),
true,
6630 nullptr, *VPI, *VPI, VPI->getDebugLoc());
6631 Recipe->insertBefore(VPI);
6632 VPI->replaceAllUsesWith(Recipe);
6633 VPI->eraseFromParent();
6644 switch (Param.ParamKind) {
6645 case VFParamKind::Vector:
6646 case VFParamKind::GlobalPredicate:
6648 case VFParamKind::OMP_Uniform:
6649 return SE->isSCEVable(Types.inferScalarType(Args[Param.ParamPos])) &&
6650 SE->isLoopInvariant(
6651 vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6653 case VFParamKind::OMP_Linear:
6654 return match(vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6655 m_scev_AffineAddRec(
6656 m_SCEV(), m_scev_SpecificSInt(Param.LinearStepOrPos),
6657 m_SpecificLoop(L)));
6674 const auto *It =
find_if(Mappings, [&](
const VFInfo &Info) {
6675 return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) &&
6678 if (It == Mappings.end())
6685struct CallWideningDecision {
6687 CallWideningDecision(KindTy Kind, Function *Variant =
nullptr)
6710 return CallWideningDecision::KindTy::Scalarize;
6720 return CallWideningDecision::KindTy::Scalarize;
6724 false, VF, CostCtx);
6727 CostCtx.
L, CostCtx.
Types);
6739 return CallWideningDecision::KindTy::Intrinsic;
6743 if (VecFunc && ScalarCost >= VecCallCost)
6744 return {CallWideningDecision::KindTy::VectorVariant, VecFunc};
6746 return CallWideningDecision::KindTy::Scalarize;
6757 if (!VPI || !VPI->getUnderlyingValue() ||
6758 VPI->getOpcode() != Instruction::Call)
6763 VPI->op_begin() + CI->arg_size());
6765 CallWideningDecision Decision =
6774 switch (Decision.Kind) {
6775 case CallWideningDecision::KindTy::Intrinsic: {
6779 *VPI, VPI->getDebugLoc());
6782 case CallWideningDecision::KindTy::VectorVariant: {
6786 VPValue *Mask = VPI->isMasked() ? VPI->getMask() : Plan.
getTrue();
6787 Ops.push_back(Mask);
6789 Ops.push_back(VPI->getOperand(VPI->getNumOperandsWithoutMask() - 1));
6791 *VPI, VPI->getDebugLoc());
6794 case CallWideningDecision::KindTy::Scalarize:
6806 return !Legacy || *Legacy == Decision.Kind;
6808 "VPlan call widening decision must match legacy decision");
6811 VPI->replaceAllUsesWith(Replacement);
6816 VPI->eraseFromParent();
6838 if (!LoadR || LoadR->isConsecutive())
6856 Align Alignment = LoadR->getAlign();
6859 if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
6864 Intrinsic::experimental_vp_strided_load, DataTy,
6865 LoadR->isMasked(), Alignment, Ctx);
6866 return StridedLoadStoreCost < CurrentCost;
6877 Ctx.invalidateWideningDecision(&LoadR->getIngredient(), VF);
6882 I32VF = Builder.createScalarZExtOrTrunc(
6894 auto *
Offset = Builder.createOverflowingOp(
6896 {AddRecPtr->hasNoUnsignedWrap(), AddRecPtr->hasNoSignedWrap()});
6897 auto *BasePtr = Builder.createNoWrapPtrAdd(
6903 VPValue *NewPtr = Builder.createVectorPointer(
6905 Ptr->getGEPNoWrapFlags(), Ptr->getDebugLoc());
6907 VPValue *Mask = LoadR->getMask();
6910 auto *StridedLoad = Builder.createWidenMemIntrinsic(
6911 Intrinsic::experimental_vp_strided_load,
6912 {NewPtr, StrideInBytes, Mask, I32VF}, LoadTy, Alignment, *LoadR,
6913 LoadR->getDebugLoc());
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
const T & front() const
Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe for widening Call instructions using library calls.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
A common mixin class for widening memory operations.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ FAddChainWithSubs
A chain of fadds and fsubs.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
MDNode * Scope
The tag for alias scope specification (used with noalias).
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
Holds the VFShape for a specific scalar to vector function mapping.
Encapsulates information needed to describe a parameter.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
CallWideningKind
Choice for how to widen a call at a given VF.
bool isMaskRequired(Instruction *I) const
Forwards to LoopVectorizationCostModel::isMaskRequired.
PredicatedScalarEvolution & PSE
bool willBeScalarized(Instruction *I, ElementCount VF) const
Returns true if I is known to be scalarized at VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetLibraryInfo & TLI
std::optional< CallWideningKind > getLegacyCallKind(CallInst *CI, ElementCount VF) const
Returns the legacy call widening decision for CI at VF, or std::nullopt if none was recorded.
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Type * getType() const
Returns the scalar type of this symbolic value.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...