58#define DEBUG_TYPE "vplan"
60#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
64 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() :
nullptr);
84 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
86 Def->addDefinedValue(
this);
90 assert(Users.empty() &&
"trying to delete a VPValue with remaining users");
92 Def->removeDefinedValue(
this);
95#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
106 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() :
nullptr);
112 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(
this);
114 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() :
nullptr);
121 return cast_or_null<VPRecipeBase>(
Def);
125 return cast_or_null<VPRecipeBase>(
Def);
133 while ((Next = Next->getParent()))
139 for (
unsigned i = 0; i < WorkList.
size(); i++) {
140 T *Current = WorkList[i];
141 if (Current->getNumPredecessors() == 0)
143 auto &Predecessors = Current->getPredecessors();
144 WorkList.
insert(Predecessors.begin(), Predecessors.end());
159 return cast<VPBasicBlock>(
Block);
166 return cast<VPBasicBlock>(
Block);
172 "Can only set plan on its entry or preheader block.");
181 return cast<VPBasicBlock>(
Block);
188 return cast<VPBasicBlock>(
Block);
192 if (!Successors.empty() || !Parent)
195 "Block w/o successors not the exiting block of its parent.");
200 if (!Predecessors.empty() || !Parent)
203 "Block w/o predecessors not the entry of its parent.");
214 while (It !=
end() && It->isPhi())
223 : VF(VF), UF(UF),
CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
225 TypeAnalysis(Plan->getCanonicalIV()->getScalarType(), Ctx) {}
229 return Def->getLiveInIRValue();
243 if (!VecPart->getType()->isVectorTy()) {
244 assert(
Instance.Lane.isFirstLane() &&
"cannot get lane > 0 for scalar");
260 "Trying to access a single scalar per part but has multiple scalars "
269 auto GetBroadcastInstrs = [
this, Def](
Value *V) {
270 bool SafeToHoist = Def->isDefinedOutsideVectorRegions();
278 if (LoopVectorPreHeader)
290 assert(Def->isLiveIn() &&
"expected a live-in");
293 Value *IRV = Def->getLiveInIRValue();
294 Value *
B = GetBroadcastInstrs(IRV);
299 Value *ScalarValue =
get(Def, {Part, 0});
303 set(Def, ScalarValue, Part);
314 assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
315 isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe()) ||
316 isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
317 "unexpected recipe found to be invariant");
322 auto *LastInst = cast<Instruction>(
get(Def, {Part, LastLane}));
328 isa<PHINode>(LastInst)
339 Value *VectorValue =
nullptr;
341 VectorValue = GetBroadcastInstrs(ScalarValue);
342 set(Def, VectorValue, Part);
347 set(Def, Undef, Part);
350 VectorValue =
get(Def, Part);
357 VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
365 if (
LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
374 if (
Instruction *ToI = dyn_cast<Instruction>(To)) {
396 << DIL->getFilename() <<
" Line: " << DIL->getLine());
420 for (
VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
425 assert(PredBB &&
"Predecessor basic-block not found building successor.");
429 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
430 if (isa<UnreachableInst>(PredBBTerminator)) {
431 assert(PredVPSuccessors.size() == 1 &&
432 "Predecessor ending w/o branch must have single successor.");
433 DebugLoc DL = PredBBTerminator->getDebugLoc();
437 }
else if (TermBr && !TermBr->isConditional()) {
438 TermBr->setSuccessor(0, NewBB);
442 unsigned idx = PredVPSuccessors.front() ==
this ? 0 : 1;
443 assert(!TermBr->getSuccessor(idx) &&
444 "Trying to reset an existing successor block.");
445 TermBr->setSuccessor(idx, NewBB);
453 assert(getHierarchicalSuccessors().
size() <= 2 &&
454 "VPIRBasicBlock can have at most two successors at the moment!");
456 executeRecipes(State, getIRBasicBlock());
457 if (getSingleSuccessor()) {
458 assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
461 getIRBasicBlock()->getTerminator()->eraseFromParent();
464 for (
VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
467 assert(PredBB &&
"Predecessor basic-block not found building successor.");
471 auto *TermBr = cast<BranchInst>(PredBBTerminator);
475 unsigned idx = PredVPSuccessors.front() ==
this ? 0 : 1;
476 assert(!TermBr->getSuccessor(idx) &&
477 "Trying to reset an existing successor block.");
478 TermBr->setSuccessor(idx, IRBB);
490 auto *R = dyn_cast<VPRegionBlock>(BB);
491 return R && !R->isReplicator();
496 !((SingleHPred = getSingleHierarchicalPredecessor()) &&
499 (SingleHPred->
getParent() == getEnclosingLoopRegion() &&
500 !IsLoopRegion(SingleHPred))) &&
501 !(Replica && getPredecessors().empty())) {
511 NewBB = createEmptyBasicBlock(State->
CFG);
524 executeRecipes(State, NewBB);
529 for (
auto *Def : R.definedValues())
530 Def->replaceAllUsesWith(NewValue);
532 for (
unsigned I = 0, E = R.getNumOperands();
I != E;
I++)
533 R.setOperand(
I, NewValue);
539 <<
" in BB:" << BB->
getName() <<
'\n');
551 assert((SplitAt == end() || SplitAt->getParent() ==
this) &&
552 "can only split at a position in the same block");
577 if (
P &&
P->isReplicator()) {
579 assert(!cast<VPRegionBlock>(
P)->isReplicator() &&
580 "unexpected nested replicate regions");
589 "block with multiple successors doesn't have a recipe as terminator");
602 "conditional branch recipe");
609 "block with 0 or 1 successors terminated by conditional branch recipe");
629#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
631 if (getSuccessors().empty()) {
632 O << Indent <<
"No successors\n";
634 O << Indent <<
"Successor(s): ";
636 for (
auto *Succ : getSuccessors())
637 O << LS << Succ->getName();
644 O << Indent <<
getName() <<
":\n";
646 auto RecipeIndent = Indent +
" ";
652 printSuccessors(O, Indent);
666 bool InRegion = Entry->getParent();
670 Old2NewVPBlocks[BB] = NewBB;
671 if (InRegion && BB->getNumSuccessors() == 0) {
672 assert(!Exiting &&
"Multiple exiting blocks?");
676 assert((!InRegion || Exiting) &&
"regions must have a single exiting block");
683 NewPreds.
push_back(Old2NewVPBlocks[Pred]);
688 NewSuccs.
push_back(Old2NewVPBlocks[Succ]);
696 for (
const auto &[OldBB, NewBB] :
699 for (
const auto &[OldPred, NewPred] :
700 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
701 assert(NewPred == Old2NewVPBlocks[OldPred] &&
"Different predecessors");
703 for (
const auto &[OldSucc, NewSucc] :
704 zip(OldBB->successors(), NewBB->successors()))
705 assert(NewSucc == Old2NewVPBlocks[OldSucc] &&
"Different successors");
709 return std::make_pair(Old2NewVPBlocks[Entry],
710 Exiting ? Old2NewVPBlocks[Exiting] :
nullptr);
714 const auto &[NewEntry, NewExiting] =
cloneFrom(getEntry());
718 Block->setParent(NewRegion);
726 Block->dropAllReferences(NewValue);
733 if (!isReplicator()) {
750 Block->execute(State);
757 assert(!State->
Instance &&
"Replicating a Region with non-null instance.");
762 for (
unsigned Part = 0,
UF = State->
UF; Part <
UF; ++Part) {
771 Block->execute(State);
788 if (!isReplicator()) {
795 <<
": vector loop backedge\n");
796 Cost += BackedgeCost;
811 VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
822#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
825 O << Indent << (isReplicator() ?
"<xVFxUF> " :
"<x1> ") <<
getName() <<
": {";
826 auto NewIndent = Indent +
" ";
831 O << Indent <<
"}\n";
833 printSuccessors(O, Indent);
838 for (
auto &KV : LiveOuts)
845 Block->dropAllReferences(&DummyValue);
849 Preheader->dropAllReferences(&DummyValue);
852 for (
VPValue *VPV : VPLiveInsToFree)
854 if (BackedgeTakenCount)
855 delete BackedgeTakenCount;
859 bool RequiresScalarEpilogueCheck,
860 bool TailFolded,
Loop *TheLoop) {
863 auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
871 auto *TopRegion =
new VPRegionBlock(HeaderVPBB, LatchVPBB,
"vector loop",
879 if (!RequiresScalarEpilogueCheck) {
910 ScalarLatchTerm->getDebugLoc(),
"cmp.n");
912 ScalarLatchTerm->getDebugLoc());
917 Value *CanonicalIVStartValue,
920 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
923 ConstantInt::get(TripCountV->
getType(), 1),
924 "trip.count.minus.1");
925 BackedgeTakenCount->setUnderlyingValue(TCMO);
928 VectorTripCount.setUnderlyingValue(VectorTripCountV);
932 VFxUF.setUnderlyingValue(
938 if (CanonicalIVStartValue) {
939 VPValue *VPV = getOrAddLiveIn(CanonicalIVStartValue);
940 auto *
IV = getCanonicalIV();
943 return isa<VPScalarIVStepsRecipe>(U) ||
944 isa<VPScalarCastRecipe>(U) ||
945 isa<VPDerivedIVRecipe>(U) ||
946 cast<VPInstruction>(U)->getOpcode() ==
949 "the canonical IV should only be used by its increment or "
950 "ScalarIVSteps when resetting the start value");
951 IV->setOperand(0, VPV);
962 R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->
end());
984 cast<BranchInst>(VectorPreHeader->
getTerminator())->setSuccessor(0,
nullptr);
994 cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
1001 assert((MiddleSuccs.size() == 1 || MiddleSuccs.size() == 2) &&
1002 "middle block has unexpected successors");
1004 MiddleSuccs.size() == 1 ? MiddleSuccs[0] : MiddleSuccs[1]);
1005 assert(!isa<VPIRBasicBlock>(ScalarPhVPBB) &&
1006 "scalar preheader cannot be wrapped already");
1020 Block->execute(State);
1027 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
1030 if (isa<VPWidenPHIRecipe>(&R))
1033 if (isa<VPWidenPointerInductionRecipe>(&R) ||
1034 isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1036 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1037 Phi = cast<PHINode>(State->
get(R.getVPSingleValue(), 0));
1039 auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1041 "recipe generating only scalars should have been replaced");
1042 auto *
GEP = cast<GetElementPtrInst>(State->
get(WidenPhi, 0));
1043 Phi = cast<PHINode>(
GEP->getPointerOperand());
1046 Phi->setIncomingBlock(1, VectorLatchBB);
1050 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1055 auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1060 bool SinglePartNeeded =
1061 isa<VPCanonicalIVPHIRecipe>(PhiR) ||
1062 isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1063 (isa<VPReductionPHIRecipe>(PhiR) &&
1064 cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
1066 isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1067 (isa<VPReductionPHIRecipe>(PhiR) &&
1068 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1069 unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->
UF;
1071 for (
unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1072 Value *Phi = State->
get(PhiR, Part, NeedsScalar);
1074 State->
get(PhiR->getBackedgeValue(),
1075 SinglePartNeeded ? State->
UF - 1 : Part, NeedsScalar);
1076 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1082 DominatorTree::VerificationLevel::Fast) &&
1083 "DT not preserved correctly");
1089 return getVectorLoopRegion()->cost(
VF, Ctx);
1092#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1096 if (VFxUF.getNumUsers() > 0) {
1102 if (VectorTripCount.getNumUsers() > 0) {
1105 O <<
" = vector-trip-count";
1108 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1110 BackedgeTakenCount->printAsOperand(O,
SlotTracker);
1111 O <<
" = backedge-taken count";
1115 if (TripCount->isLiveIn())
1118 O <<
" = original trip-count";
1126 O <<
"VPlan '" <<
getName() <<
"' {";
1130 if (!getPreheader()->empty()) {
1140 if (!LiveOuts.empty())
1142 for (
const auto &KV : LiveOuts) {
1152 RSO <<
Name <<
" for ";
1154 RSO <<
"VF={" << VFs[0];
1163 RSO <<
"UF={" << UFs[0];
1183 assert(LiveOuts.count(PN) == 0 &&
"an exit value for PN already exists");
1184 LiveOuts.insert({PN,
new VPLiveOut(PN, V)});
1195 NewDeepRPOT(NewEntry);
1198 for (
const auto &[OldBB, NewBB] :
1199 zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
1200 VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
1201 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().
size() &&
1202 "blocks must have the same number of recipes");
1203 for (
const auto &[OldR, NewR] :
zip(*OldBB, *NewBB)) {
1204 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1205 "recipes must have the same number of operands");
1206 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1207 "recipes must define the same number of operands");
1208 for (
const auto &[OldV, NewV] :
1209 zip(OldR.definedValues(), NewR.definedValues()))
1210 Old2NewVPValues[OldV] = NewV;
1216 VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
1218 for (
unsigned I = 0, E = NewR.getNumOperands();
I != E; ++
I) {
1220 NewR.setOperand(
I, NewOp);
1228 const auto &[NewEntry, __] =
cloneFrom(Entry);
1231 auto *NewPlan =
new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
1233 for (
VPValue *OldLiveIn : VPLiveInsToFree) {
1234 Old2NewVPValues[OldLiveIn] =
1235 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1237 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1238 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1239 if (BackedgeTakenCount) {
1240 NewPlan->BackedgeTakenCount =
new VPValue();
1241 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1243 assert(TripCount &&
"trip count must be set");
1244 if (TripCount->isLiveIn())
1245 Old2NewVPValues[TripCount] =
1246 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1254 for (
const auto &[
_, LO] : LiveOuts)
1255 NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1261 NewPlan->Name =
Name;
1263 "TripCount must have been added to Old2NewVPValues");
1264 NewPlan->TripCount = Old2NewVPValues[TripCount];
1268#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1271 return (isa<VPRegionBlock>(
Block) ?
"cluster_N" :
"N") +
1276 const std::string &
Name =
Block->getName();
1285 OS <<
"digraph VPlan {\n";
1286 OS <<
"graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1297 for (
auto Line : Lines)
1302 OS <<
"node [shape=rect, fontname=Courier, fontsize=30]\n";
1303 OS <<
"edge [fontname=Courier, fontsize=30]\n";
1304 OS <<
"compound=true\n";
1324 bool Hidden,
const Twine &Label) {
1329 OS << Indent << getUID(
Tail) <<
" -> " << getUID(Head);
1330 OS <<
" [ label=\"" << Label <<
'\"';
1332 OS <<
" ltail=" << getUID(
From);
1334 OS <<
" lhead=" << getUID(To);
1336 OS <<
"; splines=none";
1341 auto &Successors =
Block->getSuccessors();
1342 if (Successors.size() == 1)
1343 drawEdge(
Block, Successors.front(),
false,
"");
1344 else if (Successors.size() == 2) {
1345 drawEdge(
Block, Successors.front(),
false,
"T");
1346 drawEdge(
Block, Successors.back(),
false,
"F");
1348 unsigned SuccessorNumber = 0;
1375 EmitLine(Line,
" +\n");
1376 EmitLine(
Lines.back(),
"\n");
1379 OS << Indent <<
"]\n";
1385 OS << Indent <<
"subgraph " << getUID(
Region) <<
" {\n";
1387 OS << Indent <<
"fontname=Courier\n"
1388 << Indent <<
"label=\""
1396 OS << Indent <<
"}\n";
1401 if (
auto *Inst = dyn_cast<Instruction>(V)) {
1402 if (!Inst->getType()->isVoidTy()) {
1403 Inst->printAsOperand(O,
false);
1406 O << Inst->getOpcodeName() <<
" ";
1407 unsigned E = Inst->getNumOperands();
1409 Inst->getOperand(0)->printAsOperand(O,
false);
1410 for (
unsigned I = 1;
I < E; ++
I)
1411 Inst->getOperand(
I)->printAsOperand(O <<
", ",
false);
1414 V->printAsOperand(O,
false);
1419template void DomTreeBuilder::Calculate<VPDominatorTree>(
VPDominatorTree &DT);
1422 replaceUsesWithIf(New, [](
VPUser &,
unsigned) {
return true; });
1434 for (
unsigned J = 0; J < getNumUsers();) {
1436 bool RemovedUser =
false;
1452#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1470 visitBlock(
Base, Old2New, IAI);
1474void VPInterleavedAccessInfo::visitBlock(
VPBlockBase *
Block, Old2NewTy &Old2New,
1478 if (isa<VPWidenPHIRecipe>(&VPI))
1480 assert(isa<VPInstruction>(&VPI) &&
"Can only handle VPInstructions");
1481 auto *VPInst = cast<VPInstruction>(&VPI);
1483 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1490 auto NewIGIter = Old2New.find(IG);
1491 if (NewIGIter == Old2New.end())
1493 IG->getFactor(), IG->isReverse(), IG->getAlign());
1495 if (Inst == IG->getInsertPos())
1496 Old2New[IG]->setInsertPos(VPInst);
1498 InterleaveGroupMap[VPInst] = Old2New[IG];
1499 InterleaveGroupMap[VPInst]->insertMember(
1500 VPInst, IG->getIndex(Inst),
1501 Align(IG->isReverse() ? (-1) *
int(IG->getFactor())
1502 : IG->getFactor()));
1505 visitRegion(
Region, Old2New, IAI);
1516void VPSlotTracker::assignName(
const VPValue *V) {
1517 assert(!VPValue2Name.contains(V) &&
"VPValue already has a name!");
1518 auto *UV = V->getUnderlyingValue();
1520 VPValue2Name[V] = (
Twine(
"vp<%") +
Twine(NextSlot) +
">").str();
1529 UV->printAsOperand(S,
false);
1530 assert(!
Name.empty() &&
"Name cannot be empty.");
1534 const auto &[
A,
_] = VPValue2Name.insert({V, BaseName});
1537 if (
V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1542 const auto &[
C, UseInserted] = BaseName2Version.insert({BaseName, 0});
1545 A->second = (BaseName +
Twine(
".") +
Twine(
C->second)).str();
1549void VPSlotTracker::assignNames(
const VPlan &
Plan) {
1551 assignName(&
Plan.VFxUF);
1552 assignName(&
Plan.VectorTripCount);
1553 if (
Plan.BackedgeTakenCount)
1554 assignName(
Plan.BackedgeTakenCount);
1562 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1566void VPSlotTracker::assignNames(
const VPBasicBlock *VPBB) {
1568 for (
VPValue *Def : Recipe.definedValues())
1573 std::string
Name = VPValue2Name.lookup(V);
1587 "VPValue defined by a recipe in a VPlan?");
1590 if (
auto *UV = V->getUnderlyingValue()) {
1593 UV->printAsOperand(S,
false);
1594 return (
Twine(
"ir<") +
Name +
">").str();
1601 return all_of(Def->users(),
1602 [Def](
const VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1606 return all_of(Def->users(),
1607 [Def](
const VPUser *U) { return U->onlyFirstPartUsed(Def); });
1615 if (
auto *E = dyn_cast<SCEVConstant>(Expr))
1617 else if (
auto *E = dyn_cast<SCEVUnknown>(Expr))
1628 if (isa<VPActiveLaneMaskPHIRecipe>(V))
1631 auto IsWideCanonicalIV = [](
VPValue *
A) {
1632 return isa<VPWidenCanonicalIVRecipe>(
A) ||
1633 (isa<VPWidenIntOrFpInductionRecipe>(
A) &&
1634 cast<VPWidenIntOrFpInductionRecipe>(
A)->isCanonical());
1641 IsWideCanonicalIV(
A));
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
dxil pretty DXIL Metadata Pretty Printer
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static void dumpEdges(CFGMST< Edge, BBInfo > &MST, GCOVFunction &GF)
Generic dominator tree construction - this file provides routines to construct immediate dominator in...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
This file provides a LoopVectorizationPlanner class.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static T * getPlanEntry(T *Start)
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB)
Replace VPBB with a VPIRBasicBlock wrapping IRBB.
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
static const uint32_t IV[8]
LLVM Basic Block Representation.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the basic block to an output stream with an optional AssemblyAnnotationWriter.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static ConstantInt * getTrue(LLVMContext &Context)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Core dominator tree base class.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isScalar() const
Exactly one element.
bool shouldEmitDebugInfoForProfiling() const
Returns true if we should emit debug info for profiling.
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
void applyUpdates(ArrayRef< typename DomTreeT::UpdateType > Updates)
Submit updates to all available trees.
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
UnreachableInst * CreateUnreachable()
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
InsertPoint saveIP() const
Returns the current insert point.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
static InstructionCost getInvalid(CostType Val=0)
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
The group of interleaved loads/stores sharing the same stride and close to each other.
Drive the analysis of interleaved memory accesses in the loop.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
This is an important class for using LLVM in a threaded context.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void annotateInstWithNoAlias(Instruction *VersionedInst, const Instruction *OrigInst)
Add the noalias annotations to VersionedInst.
Represents a single loop in the control flow graph.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class provides computation of slot numbers for LLVM Assembly writing.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static IntegerType * getInt1Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
RecipeListTy::iterator iterator
Instruction iterators...
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
iterator begin()
Recipe iterator methods.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
bool isExiting() const
Returns true if the block is exiting it's parent region.
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
iterator_range< VPBlockBase ** > successors()
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
VPBlockBase * getEnclosingBlockWithPredecessors()
const VPBlocksTy & getPredecessors() const
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
VPBlockBase * getSingleHierarchicalSuccessor()
VPBlockBase * getSinglePredecessor() const
const VPBlocksTy & getHierarchicalSuccessors()
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
const VPBasicBlock * getEntryBasicBlock() const
const VPBlocksTy & getSuccessors() const
Helper for GraphTraits specialization that traverses through VPRegionBlocks.
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
VPlan-based builder utility analogous to IRBuilder.
This class augments a recipe with a set of VPValues defined by the recipe.
void dump() const
Dump the VPDef to stderr (for debugging).
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
Recipe to expand a SCEV expression.
A special type of VPBasicBlock that wraps an existing IR basic block.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
This is a concrete Recipe that models a single VPlan-level instruction.
VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
A value that is used outside the VPlan.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
void dump() const
Dump the value to stderr (for debugging).
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
VPDef * Def
Pointer to the VPDef that defines this VPValue.
VPlanPrinter prints a given VPlan to a given output stream.
LLVM_DUMP_METHOD void dump()
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
void addLiveOut(PHINode *PN, VPValue *V)
VPBasicBlock * getPreheader()
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void addSCEVExpansion(const SCEV *S, VPValue *V)
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header ) which con...
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
void print(raw_ostream &O) const
Print this VPlan to O.
VPValue * getSCEVExpansion(const SCEV *S) const
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
std::string EscapeString(const std::string &Label)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
BinaryVPInstruction_match< Op0_t, Op1_t, VPInstruction::ActiveLaneMask > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1)
VPCanonicalIVPHI_match m_CanonicalIV()
VPScalarIVSteps_match< Op0_t, Op1_t > m_ScalarIVSteps(const Op0_t &Op0, const Op1_t &Op1)
BinaryVPInstruction_match< Op0_t, Op1_t, VPInstruction::BranchOnCount > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
UnaryVPInstruction_match< Op0_t, VPInstruction::BranchOnCond > m_BranchOnCond(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool isHeaderMask(const VPValue *V, VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
auto successors(const MachineBasicBlock *BB)
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
cl::opt< bool > EnableFSDiscriminator
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
std::unique_ptr< VPlan > VPlanPtr
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Struct to hold various analysis needed for cost computations.
const TargetTransformInfo & TTI
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
void print(raw_ostream &O) const