70#define DEBUG_TYPE "simple-loop-unswitch"
75STATISTIC(NumBranches,
"Number of branches unswitched");
76STATISTIC(NumSwitches,
"Number of switches unswitched");
77STATISTIC(NumGuards,
"Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial,
"Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81 "Number of unswitch candidates that had their cost multiplier skipped");
83 "Number of invariant conditions injected and unswitched");
87 cl::desc(
"Forcibly enables non-trivial loop unswitching rather than "
88 "following the configuration passed into the pass."));
92 cl::desc(
"The cost threshold for unswitching a loop."));
96 cl::desc(
"Enable unswitch cost multiplier that prohibits exponential "
97 "explosion in nontrivial unswitch."));
100 cl::desc(
"Toplevel siblings divisor for cost multiplier."));
103 cl::desc(
"Number of unswitch candidates that are ignored when calculating "
104 "cost multiplier."));
107 cl::desc(
"If enabled, simple loop unswitching will also consider "
108 "llvm.experimental.guard intrinsics as unswitch candidates."));
110 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
112 cl::desc(
"If enabled, drop make.implicit metadata in unswitched implicit "
113 "null checks to save time analyzing if we can keep it."));
116 cl::desc(
"Max number of memory uses to explore during "
117 "partial unswitching analysis"),
121 cl::desc(
"If enabled, the freeze instruction will be added to condition "
122 "of loop unswitch to prevent miscompilation."));
125 "simple-loop-unswitch-inject-invariant-conditions",
cl::Hidden,
126 cl::desc(
"Whether we should inject new invariants and unswitch them to "
127 "eliminate some existing (non-invariant) conditions."),
131 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
133 "unswitch on them to eliminate branches that are "
134 "not-taken 1/<this option> times or less."),
144 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
147struct InjectedInvariant {
155 : Pred(Pred),
LHS(
LHS),
RHS(
RHS), InLoopSucc(InLoopSucc) {}
158struct NonTrivialUnswitchCandidate {
161 std::optional<InstructionCost>
Cost;
162 std::optional<InjectedInvariant> PendingInjection;
163 NonTrivialUnswitchCandidate(
165 std::optional<InstructionCost> Cost = std::nullopt,
166 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
167 : TI(TI), Invariants(Invariants),
Cost(
Cost),
168 PendingInjection(PendingInjection) {};
170 bool hasPendingInjection()
const {
return PendingInjection.has_value(); }
194 assert(!L.isLoopInvariant(&Root) &&
195 "Only need to walk the graph if root itself is not invariant.");
208 for (
Value *OpV :
I.operand_values()) {
210 if (isa<Constant>(OpV))
214 if (L.isLoopInvariant(OpV)) {
225 if (Visited.
insert(OpI).second)
229 }
while (!Worklist.
empty());
236 assert(!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?");
241 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
244 if (UserI && L.contains(UserI))
255 auto *PN = dyn_cast<PHINode>(&
I);
262 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
278 for (
Value *Inv : Invariants) {
287 Direction ? &NormalSucc : &UnswitchedSucc);
296 for (
auto *Val :
reverse(ToDuplicate)) {
310 auto *DefiningAccess = MemUse->getDefiningAccess();
312 while (L.contains(DefiningAccess->getBlock())) {
315 if (
auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
317 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
319 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
330 Direction ? &NormalSucc : &UnswitchedSucc);
347 for (
auto i : seq<int>(0, PN.getNumOperands())) {
348 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
349 "Found incoming block different from unique predecessor!");
350 PN.setIncomingBlock(i, &OldPH);
367 assert(&ExitBB != &UnswitchedBB &&
368 "Must have different loop exit and unswitched blocks!");
372 PN.getName() +
".split", InsertPt);
383 for (
int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
384 if (PN.getIncomingBlock(i) != &OldExitingBB)
387 Value *Incoming = PN.getIncomingValue(i);
390 PN.removeIncomingValue(i);
392 NewPN->addIncoming(Incoming, &OldPH);
398 NewPN->addIncoming(&PN, &ExitBB);
411 Loop *OldParentL = L.getParentLoop();
416 L.getExitBlocks(Exits);
417 Loop *NewParentL =
nullptr;
418 for (
auto *ExitBB : Exits)
420 if (!NewParentL || NewParentL->
contains(ExitL))
423 if (NewParentL == OldParentL)
429 "Can only hoist this loop up the nest!");
434 "Parent loop of this loop should contain this loop's preheader!");
449 for (
Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
453 return BB == &Preheader || L.contains(BB);
456 OldContainingL->getBlocksSet().erase(&Preheader);
458 OldContainingL->getBlocksSet().erase(BB);
481 const Loop *Current = TopMost;
511 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch branch: " << BI <<
"\n");
518 bool FullUnswitch =
false;
521 if (L.isLoopInvariant(
Cond)) {
525 if (
auto *CondInst = dyn_cast<Instruction>(
Cond))
527 if (Invariants.
empty()) {
534 bool ExitDirection =
true;
535 int LoopExitSuccIdx = 0;
537 if (L.contains(LoopExitBB)) {
538 ExitDirection =
false;
541 if (L.contains(LoopExitBB)) {
546 auto *ContinueBB = BI.
getSuccessor(1 - LoopExitSuccIdx);
549 LLVM_DEBUG(
dbgs() <<
" Loop exit PHI's aren't loop-invariant!\n");
562 "non-full unswitch!\n");
568 dbgs() <<
" unswitching trivial invariant conditions for: " << BI
570 for (
Value *Invariant : Invariants) {
571 dbgs() <<
" " << *Invariant <<
" == true";
572 if (Invariant != Invariants.back())
604 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
606 "A branch's parent isn't a predecessor!");
607 UnswitchedBB = LoopExitBB;
610 SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
642 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
646 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
649 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
660 Updates.
push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
668 ParentBB->getTerminator()->eraseFromParent();
680 if (UnswitchedBB == LoopExitBB)
684 *ParentBB, *OldPH, FullUnswitch);
695 for (
Value *Invariant : Invariants)
742 Value *LoopCond =
SI.getCondition();
745 if (!L.isLoopInvariant(LoopCond))
748 auto *ParentBB =
SI.getParent();
755 auto IsTriviallyUnswitchableExitBlock = [&](
BasicBlock &BBToCheck) {
757 if (L.contains(&BBToCheck))
766 auto *TI = BBToCheck.getTerminator();
767 bool isUnreachable = isa<UnreachableInst>(TI);
768 return !isUnreachable ||
769 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
773 for (
auto Case :
SI.cases())
774 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
775 ExitCaseIndices.
push_back(Case.getCaseIndex());
779 if (IsTriviallyUnswitchableExitBlock(*
SI.getDefaultDest())) {
780 DefaultExitBB =
SI.getDefaultDest();
781 }
else if (ExitCaseIndices.
empty())
796 if (!ExitL || ExitL->
contains(OuterL))
810 SI.setDefaultDest(
nullptr);
818 ExitCases.reserve(ExitCaseIndices.
size());
823 auto CaseI =
SI.case_begin() +
Index;
826 if (!ExitL || ExitL->
contains(OuterL))
830 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
838 if (
SI.getNumCases() > 0 &&
840 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
842 CommonSuccBB =
SI.case_begin()->getCaseSuccessor();
843 if (!DefaultExitBB) {
847 if (
SI.getNumCases() == 0)
848 CommonSuccBB =
SI.getDefaultDest();
849 else if (
SI.getDefaultDest() != CommonSuccBB)
850 CommonSuccBB =
nullptr;
876 UnswitchedExitBBs.
insert(DefaultExitBB);
884 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
889 for (
auto &ExitCase :
reverse(ExitCases)) {
897 if (UnswitchedExitBBs.
insert(ExitBB).second)
904 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
913 std::get<1>(ExitCase) = SplitExitBB;
918 for (
auto &ExitCase :
reverse(ExitCases)) {
920 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
922 NewSIW.
addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
933 for (
const auto &Case :
SI.cases())
936 }
else if (DefaultCaseWeight) {
939 for (
const auto &Case :
SI.cases()) {
942 "case weight must be defined as default case weight is defined");
957 bool SkippedFirst = DefaultExitBB ==
nullptr;
958 for (
auto Case :
SI.cases()) {
960 "Non-common successor!");
972 }
else if (DefaultExitBB) {
974 "If we had no cases we'd have a common successor!");
979 auto LastCaseI = std::prev(
SI.case_end());
981 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
992 for (
auto *UnswitchedExitBB : UnswitchedExitBBs) {
996 for (
auto SplitUnswitchedPair : SplitExitBBMap) {
1009 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
1039 bool Changed =
false;
1054 Visited.
insert(CurrentBB);
1061 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1069 if (
auto *
SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1073 if (isa<Constant>(
SI->getCondition()))
1087 auto *BI = dyn_cast<BranchInst>(CurrentBB->
getTerminator());
1088 if (!BI || BI->isConditional())
1091 CurrentBB = BI->getSuccessor(0);
1095 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1103 if (!BI->isConditional() ||
1118 if (BI->isConditional())
1122 CurrentBB = BI->getSuccessor(0);
1127 }
while (L.contains(CurrentBB) && Visited.
insert(CurrentBB).second);
1165 NewBlocks.
reserve(L.getNumBlocks() + ExitBlocks.
size());
1176 VMap[OldBB] = NewBB;
1184 auto It = DominatingSucc.
find(BB);
1185 return It != DominatingSucc.
end() && It->second != UnswitchedSuccBB;
1189 auto *ClonedPH = CloneBlock(LoopPH);
1192 for (
auto *LoopBB : L.blocks())
1193 if (!SkipBlock(LoopBB))
1199 for (
auto *ExitBB : ExitBlocks) {
1200 if (SkipBlock(ExitBB))
1208 auto *MergeBB =
SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1213 MergeBB->takeName(ExitBB);
1214 ExitBB->setName(
Twine(MergeBB->getName()) +
".split");
1217 auto *ClonedExitBB = CloneBlock(ExitBB);
1218 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1219 "Exit block should have been split to have one successor!");
1220 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1221 "Cloned exit block has the wrong successor!");
1227 std::prev(ClonedExitBB->end())))) {
1234 (isa<PHINode>(
I) || isa<LandingPadInst>(
I) || isa<CatchPadInst>(
I)) &&
1235 "Bad instruction in exit block!");
1237 assert(VMap.
lookup(&
I) == &ClonedI &&
"Mismatch in the value map!");
1240 if (SE && isa<PHINode>(
I))
1245 &*MergeBB->getFirstInsertionPt());
1246 I.replaceAllUsesWith(MergePN);
1247 MergePN->addIncoming(&
I, ExitBB);
1248 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1257 for (
auto *ClonedBB : NewBlocks)
1261 if (
auto *II = dyn_cast<AssumeInst>(&
I))
1267 for (
auto *LoopBB : L.blocks())
1268 if (SkipBlock(LoopBB))
1270 if (
auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB)))
1271 for (
PHINode &PN : ClonedSuccBB->phis())
1272 PN.removeIncomingValue(LoopBB,
false);
1276 auto *ClonedParentBB = cast<BasicBlock>(VMap.
lookup(ParentBB));
1278 if (SuccBB == UnswitchedSuccBB)
1281 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB));
1285 ClonedSuccBB->removePredecessor(ClonedParentBB,
1291 auto *ClonedSuccBB = cast<BasicBlock>(VMap.
lookup(UnswitchedSuccBB));
1292 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1295 Value *ClonedConditionToErase =
nullptr;
1296 if (
auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1297 ClonedConditionToErase = BI->getCondition();
1298 else if (
auto *
SI = dyn_cast<SwitchInst>(ClonedTerminator))
1299 ClonedConditionToErase =
SI->getCondition();
1304 if (ClonedConditionToErase)
1311 for (
PHINode &PN : ClonedSuccBB->phis()) {
1315 for (
int i = PN.getNumOperands() - 1; i >= 0; --i) {
1316 if (PN.getIncomingBlock(i) != ClonedParentBB)
1322 PN.removeIncomingValue(i,
false);
1328 for (
auto *ClonedBB : NewBlocks) {
1330 if (SuccSet.
insert(SuccBB).second)
1331 DTUpdates.
push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1346 auto AddClonedBlocksToLoop = [&](
Loop &OrigL,
Loop &ClonedL) {
1347 assert(ClonedL.getBlocks().empty() &&
"Must start with an empty loop!");
1349 for (
auto *BB : OrigL.
blocks()) {
1350 auto *ClonedBB = cast<BasicBlock>(VMap.
lookup(BB));
1351 ClonedL.addBlockEntry(ClonedBB);
1364 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1376 LoopsToClone.
push_back({ClonedRootL, ChildL});
1378 Loop *ClonedParentL, *L;
1379 std::tie(ClonedParentL, L) = LoopsToClone.
pop_back_val();
1382 AddClonedBlocksToLoop(*L, *ClonedL);
1384 LoopsToClone.
push_back({ClonedL, ChildL});
1385 }
while (!LoopsToClone.
empty());
1406 Loop *ClonedL =
nullptr;
1411 auto *ClonedPH = cast<BasicBlock>(VMap.
lookup(OrigPH));
1412 auto *ClonedHeader = cast<BasicBlock>(VMap.
lookup(OrigHeader));
1418 Loop *ParentL =
nullptr;
1422 for (
auto *ExitBB : ExitBlocks)
1423 if (
auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.
lookup(ExitBB)))
1425 ExitLoopMap[ClonedExitBB] = ExitL;
1426 ClonedExitsInLoops.
push_back(ClonedExitBB);
1427 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1432 "The computed parent loop should always contain (or be) the parent of "
1433 "the original loop.");
1440 for (
auto *BB : OrigL.
blocks())
1441 if (
auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB)))
1442 ClonedLoopBlocks.
insert(ClonedBB);
1453 if (Pred == ClonedPH)
1458 assert(ClonedLoopBlocks.
count(Pred) &&
"Found a predecessor of the loop "
1459 "header other than the preheader "
1460 "that is not part of the loop!");
1465 if (BlocksInClonedLoop.
insert(Pred).second && Pred != ClonedHeader)
1472 if (!BlocksInClonedLoop.
empty()) {
1473 BlocksInClonedLoop.
insert(ClonedHeader);
1475 while (!Worklist.
empty()) {
1478 "Didn't put block into the loop set!");
1486 if (ClonedLoopBlocks.
count(Pred) &&
1487 BlocksInClonedLoop.
insert(Pred).second)
1506 for (
auto *BB : OrigL.
blocks()) {
1507 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB));
1508 if (!ClonedBB || !BlocksInClonedLoop.
count(ClonedBB))
1520 for (
Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1521 PL->addBlockEntry(ClonedBB);
1528 for (
Loop *ChildL : OrigL) {
1529 auto *ClonedChildHeader =
1530 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1531 if (!ClonedChildHeader || !BlocksInClonedLoop.
count(ClonedChildHeader))
1537 for (
auto *ChildLoopBB : ChildL->blocks())
1539 cast<BasicBlock>(VMap.
lookup(ChildLoopBB))) &&
1540 "Child cloned loop has a header within the cloned outer "
1541 "loop but not all of its blocks!");
1556 if (BlocksInClonedLoop.
empty())
1557 UnloopedBlockSet.
insert(ClonedPH);
1558 for (
auto *ClonedBB : ClonedLoopBlocks)
1559 if (!BlocksInClonedLoop.
count(ClonedBB))
1560 UnloopedBlockSet.
insert(ClonedBB);
1566 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1568 return ExitLoopMap.
lookup(
LHS)->getLoopDepth() <
1569 ExitLoopMap.
lookup(
RHS)->getLoopDepth();
1574 while (!UnloopedBlockSet.
empty() && !OrderedClonedExitsInLoops.empty()) {
1575 assert(Worklist.
empty() &&
"Didn't clear worklist!");
1577 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1592 if (!UnloopedBlockSet.
erase(PredBB)) {
1594 (BlocksInClonedLoop.
count(PredBB) || ExitLoopMap.
count(PredBB)) &&
1595 "Predecessor not mapped to a loop!");
1602 bool Inserted = ExitLoopMap.
insert({PredBB, ExitL}).second;
1604 assert(Inserted &&
"Should only visit an unlooped block once!");
1609 }
while (!Worklist.
empty());
1618 for (
auto *BB : llvm::concat<BasicBlock *const>(
1619 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1621 OuterL->addBasicBlockToLoop(BB, LI);
1624 for (
auto &BBAndL : ExitLoopMap) {
1625 auto *BB = BBAndL.first;
1626 auto *OuterL = BBAndL.second;
1628 "Failed to put all blocks into outer loops!");
1635 for (
Loop *ChildL : OrigL) {
1636 auto *ClonedChildHeader =
1637 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1638 if (!ClonedChildHeader || BlocksInClonedLoop.
count(ClonedChildHeader))
1642 for (
auto *ChildLoopBB : ChildL->blocks())
1644 "Cloned a child loop header but not all of that loops blocks!");
1648 *ChildL, ExitLoopMap.
lookup(ClonedChildHeader), VMap, LI));
1654 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1658 for (
BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1659 for (
const auto &VMap : VMaps)
1660 if (
BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1663 SuccBB->removePredecessor(ClonedBB);
1676 BB->dropAllReferences();
1679 BB->eraseFromParent();
1697 DeathCandidates.
append(L.blocks().begin(), L.blocks().end());
1698 while (!DeathCandidates.
empty()) {
1702 SuccBB->removePredecessor(BB);
1719 for (
Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1720 for (
auto *BB : DeadBlockSet)
1721 ParentL->getBlocksSet().erase(BB);
1723 [&](
BasicBlock *BB) { return DeadBlockSet.count(BB); });
1729 if (!DeadBlockSet.count(ChildL->getHeader()))
1732 assert(llvm::all_of(ChildL->blocks(),
1733 [&](BasicBlock *ChildBB) {
1734 return DeadBlockSet.count(ChildBB);
1736 "If the child loop header is dead all blocks in the child loop must "
1737 "be dead as well!");
1738 DestroyLoopCB(*ChildL, ChildL->
getName());
1748 for (
auto *BB : DeadBlockSet) {
1750 assert(!DT.
getNode(BB) &&
"Should already have cleared domtree!");
1757 BB->dropAllReferences();
1762 for (
auto *BB : DeadBlockSet)
1763 BB->eraseFromParent();
1781 auto *PH = L.getLoopPreheader();
1782 auto *Header = L.getHeader();
1796 assert(L.contains(Pred) &&
"Found a predecessor of the loop header other "
1797 "than the preheader that is not part of the "
1803 if (LoopBlockSet.
insert(Pred).second && Pred != Header)
1808 if (LoopBlockSet.
empty())
1809 return LoopBlockSet;
1812 while (!Worklist.
empty()) {
1814 assert(LoopBlockSet.
count(BB) &&
"Didn't put block into the loop set!");
1826 assert(L.contains(InnerL) &&
1827 "Should not reach a loop *outside* this loop!");
1830 auto *InnerPH = InnerL->getLoopPreheader();
1831 assert(L.contains(InnerPH) &&
"Cannot contain an inner loop block "
1832 "but not contain the inner loop "
1834 if (!LoopBlockSet.
insert(InnerPH).second)
1844 for (
auto *InnerBB : InnerL->blocks()) {
1845 if (InnerBB == BB) {
1847 "Block should already be in the set!");
1851 LoopBlockSet.
insert(InnerBB);
1863 if (L.contains(Pred) && LoopBlockSet.
insert(Pred).second)
1867 assert(LoopBlockSet.
count(Header) &&
"Cannot fail to add the header!");
1871 return LoopBlockSet;
1892 auto *PH = L.getLoopPreheader();
1896 Loop *ParentL =
nullptr;
1900 for (
auto *ExitBB : ExitBlocks)
1904 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1916 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1918 for (
Loop *IL = L.getParentLoop(); IL != ParentL;
1920 IL->getBlocksSet().erase(PH);
1921 for (
auto *BB : L.blocks())
1922 IL->getBlocksSet().erase(BB);
1924 return BB == PH || L.contains(BB);
1929 L.getParentLoop()->removeChildLoop(&L);
1937 auto &Blocks = L.getBlocksVector();
1939 LoopBlockSet.empty()
1941 : std::stable_partition(
1942 Blocks.begin(), Blocks.end(),
1943 [&](
BasicBlock *BB) { return LoopBlockSet.count(BB); });
1947 if (LoopBlockSet.empty())
1948 UnloopedBlocks.
insert(PH);
1951 for (
auto *BB :
make_range(BlocksSplitI, Blocks.end()))
1952 L.getBlocksSet().erase(BB);
1953 Blocks.erase(BlocksSplitI, Blocks.end());
1963 Loop *PrevExitL = L.getParentLoop();
1965 auto RemoveUnloopedBlocksFromLoop =
1967 for (
auto *BB : UnloopedBlocks)
1968 L.getBlocksSet().erase(BB);
1970 return UnloopedBlocks.count(BB);
1975 while (!UnloopedBlocks.
empty() && !ExitsInLoops.
empty()) {
1976 assert(Worklist.
empty() &&
"Didn't clear worklist!");
1977 assert(NewExitLoopBlocks.empty() &&
"Didn't clear loop set!");
1982 assert(ExitL.
contains(&L) &&
"Exit loop must contain the inner loop!");
1988 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->
getParentLoop())
1989 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2003 if (!UnloopedBlocks.
erase(PredBB)) {
2004 assert((NewExitLoopBlocks.count(PredBB) ||
2006 "Predecessor not in a nested loop (or already visited)!");
2013 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2015 assert(Inserted &&
"Should only visit an unlooped block once!");
2020 }
while (!Worklist.
empty());
2025 for (
auto *BB : NewExitLoopBlocks)
2027 if (BBL == &L || !L.contains(BBL))
2032 NewExitLoopBlocks.clear();
2038 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2039 for (
auto *BB : UnloopedBlocks)
2041 if (BBL == &L || !L.contains(BBL))
2047 auto &SubLoops = L.getSubLoopsVector();
2048 auto SubLoopsSplitI =
2049 LoopBlockSet.empty()
2051 : std::stable_partition(
2052 SubLoops.begin(), SubLoops.end(), [&](
Loop *SubL) {
2053 return LoopBlockSet.count(SubL->getHeader());
2055 for (
auto *HoistedL :
make_range(SubLoopsSplitI, SubLoops.end())) {
2057 HoistedL->setParentLoop(
nullptr);
2067 if (
auto *NewParentL = LI.
getLoopFor(HoistedL->getLoopPreheader()))
2068 NewParentL->addChildLoop(HoistedL);
2072 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2075 if (Blocks.empty()) {
2076 assert(SubLoops.empty() &&
2077 "Failed to remove all subloops from the original loop!");
2078 if (
Loop *ParentL = L.getParentLoop())
2096template <
typename CallableT>
2108 if (!Callable(
N->getBlock()))
2114 "Cannot visit a node twice when walking a tree!");
2117 }
while (!DomWorklist.
empty());
2135 "Can only unswitch switches and conditional branch!");
2139 !PartiallyInvariant);
2142 "Cannot have other invariants with full unswitching!");
2145 "Partial unswitching requires an instruction as the condition!");
2158 if (!FullUnswitch) {
2162 PartiallyInvariant) &&
2163 "Only `or`, `and`, an `select`, partially invariant instructions "
2164 "can combine invariants being unswitched.");
2180 for (
auto Case :
SI->cases())
2181 if (Case.getCaseSuccessor() != RetainedSuccBB)
2182 UnswitchedSuccBBs.
insert(Case.getCaseSuccessor());
2184 assert(!UnswitchedSuccBBs.
count(RetainedSuccBB) &&
2185 "Should not unswitch the same successor we are retaining!");
2194 Loop *ParentL = L.getParentLoop();
2203 Loop *OuterExitL = &L;
2205 L.getUniqueExitBlocks(ExitBlocks);
2206 for (
auto *ExitBB : ExitBlocks) {
2208 if (!NewOuterExitL) {
2210 OuterExitL =
nullptr;
2213 if (NewOuterExitL != OuterExitL && NewOuterExitL->
contains(OuterExitL))
2214 OuterExitL = NewOuterExitL;
2228 bool InsertFreeze =
false;
2238 Value *FullUnswitchCond =
nullptr;
2244 FullUnswitchCond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
2253 for (
auto *SuccBB : llvm::concat<BasicBlock *const>(
ArrayRef(RetainedSuccBB),
2255 if (SuccBB->getUniquePredecessor() ||
2257 return PredBB == ParentBB || DT.
dominates(SuccBB, PredBB);
2260 DominatingSucc[BB] = SuccBB;
2279 for (
auto *SuccBB : UnswitchedSuccBBs) {
2282 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2283 DominatingSucc, *VMaps.
back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2288 if (TI.
getMetadata(LLVMContext::MD_make_implicit)) {
2292 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2299 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2306 SplitBB->getTerminator()->eraseFromParent();
2310 SplitBB->splice(SplitBB->end(), ParentBB, TI.
getIterator());
2314 NewTI->
insertInto(ParentBB, ParentBB->end());
2323 FullUnswitchCond, FullUnswitchCond->
getName() +
".fr", BI);
2325 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2327 assert(
SI &&
"Must either be a branch or switch!");
2330 assert(
SI->getDefaultDest() == RetainedSuccBB &&
2331 "Not retaining default successor!");
2332 SI->setDefaultDest(LoopPH);
2333 for (
const auto &Case :
SI->cases())
2334 if (Case.getCaseSuccessor() == RetainedSuccBB)
2335 Case.setSuccessor(LoopPH);
2337 Case.setSuccessor(ClonedPHs.
find(Case.getCaseSuccessor())->second);
2341 FullUnswitchCond, FullUnswitchCond->
getName() +
".fr",
SI));
2348 {DominatorTree::Insert, SplitBB, ClonedPHs.
find(SuccBB)->second});
2362 for (
auto &VMap : VMaps)
2378 "Only one possible unswitched block for a branch!");
2382 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2392 "Not retaining default successor!");
2393 for (
const auto &Case : NewSI->
cases())
2394 Case.getCaseSuccessor()->removePredecessor(
2402 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, SuccBB});
2406 ParentBB->getTerminator()->eraseFromParent();
2412 assert(BI &&
"Only branches have partial unswitching.");
2414 "Only one possible unswitched block for a branch!");
2418 if (PartiallyInvariant)
2420 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH, L, MSSAU);
2423 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH,
2426 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2433 for (
auto &VMap : VMaps)
2453 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2476 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
2478 if (BI && !PartiallyInvariant) {
2484 "Only one possible unswitched block for a branch!");
2496 bool ReplaceUnswitched =
2497 FullUnswitch || (Invariants.
size() == 1) || PartiallyInvariant;
2505 for (
Value *Invariant : Invariants) {
2506 assert(!isa<Constant>(Invariant) &&
2507 "Should not be replacing constant values!");
2510 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2517 U.set(ContinueReplacement);
2518 else if (ReplaceUnswitched &&
2520 U.set(UnswitchedReplacement);
2537 auto UpdateLoop = [&](
Loop &UpdateL) {
2539 UpdateL.verifyLoop();
2540 for (
Loop *ChildL : UpdateL) {
2541 ChildL->verifyLoop();
2542 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2543 "Perturbed a child loop's LCSSA form!");
2563 for (
Loop *UpdatedL :
2564 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2565 UpdateLoop(*UpdatedL);
2566 if (UpdatedL->isOutermost())
2567 OuterExitL =
nullptr;
2571 if (L.isOutermost())
2572 OuterExitL =
nullptr;
2577 if (OuterExitL != &L)
2578 for (
Loop *OuterL = ParentL; OuterL != OuterExitL;
2580 UpdateLoop(*OuterL);
2592 for (
Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2593 if (UpdatedL->getParentLoop() == ParentL)
2595 UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2618 auto BBCostIt = BBCostMap.
find(
N.getBlock());
2619 if (BBCostIt == BBCostMap.
end())
2623 auto DTCostIt = DTCostMap.
find(&
N);
2624 if (DTCostIt != DTCostMap.
end())
2625 return DTCostIt->second;
2630 N.begin(),
N.end(), BBCostIt->second,
2632 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2634 bool Inserted = DTCostMap.
insert({&
N,
Cost}).second;
2636 assert(Inserted &&
"Should not insert a node while visiting children!");
2675 if (Successors.
insert(Succ).second)
2676 DTUpdates.
push_back({DominatorTree::Delete, CheckBB, Succ});
2686 GuardedBlock->
setName(
"guarded");
2698 DTUpdates.
push_back({DominatorTree::Insert, CheckBB, Succ});
2702 for (
auto *Succ : Successors)
2703 DTUpdates.
push_back({DominatorTree::Insert, GuardedBlock, Succ});
2708 L.addBasicBlockToLoop(GuardedBlock, LI);
2748 return L.contains(SuccBB);
2750 NumCostMultiplierSkipped++;
2754 auto *ParentL = L.getParentLoop();
2755 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2756 : std::distance(LI.
begin(), LI.
end()));
2759 int UnswitchedClones = 0;
2760 for (
auto Candidate : UnswitchCandidates) {
2763 bool SkipExitingSuccessors = DT.
dominates(CondBlock, Latch);
2765 if (!SkipExitingSuccessors)
2769 int NonExitingSuccessors =
2771 [SkipExitingSuccessors, &L](
const BasicBlock *SuccBB) {
2772 return !SkipExitingSuccessors || L.contains(SuccBB);
2774 UnswitchedClones +=
Log2_32(NonExitingSuccessors);
2782 unsigned ClonesPower =
2786 int SiblingsMultiplier =
2787 std::max((ParentL ? SiblingsCount
2797 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2801 <<
" (siblings " << SiblingsMultiplier <<
" * clones "
2802 << (1 << ClonesPower) <<
")"
2803 <<
" for unswitch candidate: " << TI <<
"\n");
2804 return CostMultiplier;
2812 assert(UnswitchCandidates.
empty() &&
"Should be!");
2814 bool CollectGuards =
false;
2816 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2818 if (GuardDecl && !GuardDecl->use_empty())
2819 CollectGuards =
true;
2822 for (
auto *BB : L.blocks()) {
2832 if (!isa<Constant>(
Cond) && L.isLoopInvariant(
Cond))
2836 if (
auto *
SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2839 if (!isa<Constant>(
SI->getCondition()) &&
2840 L.isLoopInvariant(
SI->getCondition()) && !BB->getUniqueSuccessor())
2845 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2846 if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2847 BI->getSuccessor(0) == BI->getSuccessor(1))
2851 if (isa<Constant>(
Cond))
2854 if (L.isLoopInvariant(
Cond)) {
2863 if (Invariants.
empty())
2866 UnswitchCandidates.
push_back({BI, std::move(Invariants)});
2872 !
any_of(UnswitchCandidates, [&L](
auto &TerminatorAndInvariants) {
2873 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2878 dbgs() <<
"simple-loop-unswitch: Found partially invariant condition "
2879 << *
Info->InstToDuplicate[0] <<
"\n");
2880 PartialIVInfo = *
Info;
2881 PartialIVCondBranch = L.getHeader()->getTerminator();
2885 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2888 return !UnswitchCandidates.
empty();
2901 if (!L.contains(IfTrue)) {
2902 Pred = ICmpInst::getInversePredicate(Pred);
2907 if (L.isLoopInvariant(
LHS)) {
2908 Pred = ICmpInst::getSwappedPredicate(Pred);
2914 Pred = ICmpInst::ICMP_ULT;
2927 if (L.isLoopInvariant(
LHS) || !L.isLoopInvariant(
RHS))
2930 if (Pred != ICmpInst::ICMP_ULT)
2933 if (!L.contains(IfTrue) || L.contains(IfFalse))
2937 if (L.getHeader() == IfTrue)
2953 if (BI->
hasMetadata(
"llvm.invariant.condition.injection.disabled"))
2961 assert(Weights.
size() == 2 &&
"Unexpected profile data!");
2963 auto Num = Weights[
Idx];
2964 auto Denom = Weights[0] + Weights[1];
2966 if (Denom == 0 || Num > Denom)
2969 if (LikelyTaken > ActualTaken)
2992static NonTrivialUnswitchCandidate
2996 assert(Candidate.hasPendingInjection() &&
"Nothing to inject!");
2997 BasicBlock *Preheader = L.getLoopPreheader();
2998 assert(Preheader &&
"Loop is not in simplified form?");
3000 "Unswitching branch of inner loop!");
3002 auto Pred = Candidate.PendingInjection->Pred;
3003 auto *
LHS = Candidate.PendingInjection->LHS;
3004 auto *
RHS = Candidate.PendingInjection->RHS;
3005 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3006 auto *TI = cast<BranchInst>(Candidate.TI);
3007 auto *BB = Candidate.TI->getParent();
3011 assert(L.contains(InLoopSucc) &&
"Not supported yet!");
3012 assert(!L.contains(OutOfLoopSucc) &&
"Not supported yet!");
3013 auto &Ctx = BB->getContext();
3016 assert(ICmpInst::isUnsigned(Pred) &&
"Not supported yet!");
3026 auto *InjectedCond =
3027 ICmpInst::Create(Instruction::ICmp, Pred,
LHS,
RHS,
"injected.cond",
3029 auto *OldCond = TI->getCondition();
3032 BB->getParent(), InLoopSucc);
3035 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3037 Builder.SetInsertPoint(CheckBlock);
3038 auto *NewTerm =
Builder.CreateCondBr(OldCond, InLoopSucc, OutOfLoopSucc);
3042 NewTerm->setMetadata(
"llvm.invariant.condition.injection.disabled",
3046 for (
auto &
I : *InLoopSucc) {
3047 auto *PN = dyn_cast<PHINode>(&
I);
3050 auto *Inc = PN->getIncomingValueForBlock(BB);
3051 PN->addIncoming(Inc, CheckBlock);
3053 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3056 { DominatorTree::Insert, BB, CheckBlock },
3057 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3058 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3059 { DominatorTree::Delete, BB, OutOfLoopSucc }
3065 L.addBasicBlockToLoop(CheckBlock, LI);
3077 LLVM_DEBUG(
dbgs() <<
"Injected a new loop-invariant branch " << *InvariantBr
3078 <<
" and considering it for unswitching.");
3079 ++NumInvariantConditionsInjected;
3080 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3101 assert(ICmpInst::isStrictPredicate(Pred));
3102 if (Compares.
size() < 2)
3105 for (
auto Prev = Compares.
begin(), Next = Compares.
begin() + 1;
3106 Next != Compares.
end(); ++Prev, ++Next) {
3110 InjectedInvariant ToInject(NonStrictPred,
LHS,
RHS, InLoopSucc);
3111 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3112 std::nullopt, std::move(ToInject));
3113 UnswitchCandidates.
push_back(std::move(Candidate));
3143 auto *Latch = L.getLoopLatch();
3147 assert(L.getLoopPreheader() &&
"Must have a preheader!");
3152 for (
auto *DTN = DT.
getNode(Latch); L.contains(DTN->getBlock());
3153 DTN = DTN->getIDom()) {
3156 BasicBlock *IfTrue =
nullptr, *IfFalse =
nullptr;
3157 auto *BB = DTN->getBlock();
3161 auto *Term = BB->getTerminator();
3173 CompareDesc Desc(cast<BranchInst>(Term),
RHS, IfTrue);
3174 while (
auto *Zext = dyn_cast<ZExtInst>(
LHS))
3175 LHS = Zext->getOperand(0);
3176 CandidatesULT[
LHS].push_back(Desc);
3180 for (
auto &It : CandidatesULT)
3182 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3187 if (!L.isSafeToClone())
3189 for (
auto *BB : L.blocks())
3190 for (
auto &
I : *BB) {
3191 if (
I.getType()->isTokenTy() &&
I.isUsedOutsideOfBlock(BB))
3193 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
3194 assert(!CB->cannotDuplicate() &&
"Checked by L.isSafeToClone().");
3195 if (CB->isConvergent())
3208 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3212 L.getUniqueExitBlocks(ExitBlocks);
3217 for (
auto *ExitBB : ExitBlocks) {
3218 auto *
I = ExitBB->getFirstNonPHI();
3219 if (isa<CleanupPadInst>(
I) || isa<CatchSwitchInst>(
I)) {
3220 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch because of cleanuppad/catchswitch "
3248 L.getHeader()->getParent()->hasMinSize()
3252 for (
auto *BB : L.blocks()) {
3254 for (
auto &
I : *BB) {
3259 assert(
Cost >= 0 &&
"Must not have negative costs!");
3261 assert(LoopCost >= 0 &&
"Must not have negative loop costs!");
3262 BBCostMap[BB] =
Cost;
3291 if (!Visited.
insert(SuccBB).second)
3299 if (!FullUnswitch) {
3300 auto &BI = cast<BranchInst>(TI);
3303 if (SuccBB == BI.getSuccessor(1))
3306 if (SuccBB == BI.getSuccessor(0))
3309 SuccBB == BI.getSuccessor(0)) ||
3311 SuccBB == BI.getSuccessor(1)))
3319 if (SuccBB->getUniquePredecessor() ||
3321 return PredBB == &BB || DT.
dominates(SuccBB, PredBB);
3325 "Non-duplicated cost should never exceed total loop cost!");
3334 int SuccessorsCount =
isGuard(&TI) ? 2 : Visited.
size();
3335 assert(SuccessorsCount > 1 &&
3336 "Cannot unswitch a condition without multiple distinct successors!");
3337 return (LoopCost -
Cost) * (SuccessorsCount - 1);
3340 std::optional<NonTrivialUnswitchCandidate> Best;
3341 for (
auto &Candidate : UnswitchCandidates) {
3346 !BI || Candidate.hasPendingInjection() ||
3347 (Invariants.
size() == 1 &&
3349 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3353 int CostMultiplier =
3357 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3358 CandidateCost *= CostMultiplier;
3360 <<
" (multiplier: " << CostMultiplier <<
")"
3361 <<
" for unswitch candidate: " << TI <<
"\n");
3364 <<
" for unswitch candidate: " << TI <<
"\n");
3367 if (!Best || CandidateCost < Best->
Cost) {
3369 Best->Cost = CandidateCost;
3372 assert(Best &&
"Must be!");
3388 PartialIVCondBranch, L, LI, AA, MSSAU);
3390 PartialIVCondBranch, L, DT, LI, AA,
3393 if (UnswitchCandidates.
empty())
3397 dbgs() <<
"Considering " << UnswitchCandidates.
size()
3398 <<
" non-trivial loop invariant conditions for unswitching.\n");
3401 UnswitchCandidates, L, DT, LI, AC,
TTI, PartialIVInfo);
3403 assert(Best.TI &&
"Failed to find loop unswitch candidate");
3404 assert(Best.Cost &&
"Failed to compute cost");
3407 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch, lowest cost found: " << *Best.Cost
3412 if (Best.hasPendingInjection())
3414 assert(!Best.hasPendingInjection() &&
3415 "All injections should have been done by now!");
3417 if (Best.TI != PartialIVCondBranch)
3425 LLVM_DEBUG(
dbgs() <<
" Unswitching non-trivial (cost = " << Best.Cost
3426 <<
") terminator: " << *Best.TI <<
"\n");
3428 LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
3461 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3462 "Loops must be in LCSSA form before unswitching.");
3465 if (!L.isLoopSimplifyForm())
3472 UnswitchCB(
true,
false, {});
3487 bool ContinueWithNonTrivial =
3489 if (!ContinueWithNonTrivial)
3493 if (L.getHeader()->getParent()->hasOptSize())
3498 auto IsLoopNestCold = [&](
const Loop *L) {
3504 Parent = Parent->getParentLoop();
3508 Worklist.
insert(Worklist.
end(), L->getSubLoops().begin(),
3509 L->getSubLoops().end());
3510 while (!Worklist.
empty()) {
3514 Worklist.
insert(Worklist.
end(), CurLoop->getSubLoops().begin(),
3515 CurLoop->getSubLoops().end());
3550 Function &
F = *L.getHeader()->getParent();
3553 if (
auto OuterProxy =
3557 LLVM_DEBUG(
dbgs() <<
"Unswitching loop in " <<
F.getName() <<
": " << L
3562 std::string LoopName = std::string(L.getName());
3564 auto UnswitchCB = [&L, &U, &LoopName](
bool CurrentLoopValid,
3565 bool PartiallyInvariant,
3568 if (!NewLoops.empty())
3569 U.addSiblingLoops(NewLoops);
3573 if (CurrentLoopValid) {
3574 if (PartiallyInvariant) {
3577 auto &
Context = L.getHeader()->getContext();
3582 Context, L.getLoopID(), {
"llvm.loop.unswitch.partial"},
3583 {DisableUnswitchMD});
3584 L.setLoopID(NewLoopID);
3586 U.revisitCurrentLoop();
3588 U.markLoopAsDeleted(L, LoopName);
3592 U.markLoopAsDeleted(L,
Name);
3595 std::optional<MemorySSAUpdater> MSSAU;
3602 UnswitchCB, &AR.
SE, MSSAU ? &*MSSAU :
nullptr, PSI, AR.
BFI,
3622 OS, MapClassName2PassName);
3625 OS << (NonTrivial ?
"" :
"no-") <<
"nontrivial;";
3626 OS << (Trivial ?
"" :
"no-") <<
"trivial";
3632class SimpleLoopUnswitchLegacyPass :
public LoopPass {
3638 explicit SimpleLoopUnswitchLegacyPass(
bool NonTrivial =
false)
3663 LLVM_DEBUG(
dbgs() <<
"Unswitching loop in " <<
F.getName() <<
": " << *L
3665 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3666 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3667 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
3668 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3669 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
3670 MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3673 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3674 auto *SE = SEWP ? &SEWP->getSE() :
nullptr;
3676 auto UnswitchCB = [&
L, &LPM](
bool CurrentLoopValid,
bool PartiallyInvariant,
3679 for (
auto *NewL : NewLoops)
3685 if (CurrentLoopValid) {
3689 if (!PartiallyInvariant)
3702 unswitchLoop(*L, DT, LI, AC, AA,
TTI,
true, NonTrivial, UnswitchCB, SE,
3703 &MSSAU,
nullptr,
nullptr, DestroyLoopCB);
3710 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
3715char SimpleLoopUnswitchLegacyPass::ID = 0;
3717 "Simple unswitch loops",
false,
false)
3728 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
SmallVector< MachineOperand, 4 > Cond
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static void canonicalizeForInvariantConditionInjection(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
static const Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Unswitch control flow predicated on loop invariant conditions.
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This defines the Use class.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction & front() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
bool isOneValue() const
Returns true if the value is one.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
const BasicBlock * getParent() const
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
SymbolTableList< Instruction >::iterator insertInto(BasicBlock *ParentBB, SymbolTableList< Instruction >::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L)
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
LoopT * AllocateLoop(ArgsTy &&... Args)
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
StringRef getName() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
An analysis that produces MemorySSA for a function.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
size_type size() const
Determine the number of elements in the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
unsigned getIntegerBitWidth() const
A Use represents the edge between a Value definition and its users.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
auto reverse(ContainerTy &&C)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
void sort(IteratorTy Start, IteratorTy End)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
bool pred_empty(const BasicBlock *BB)
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Struct to hold information about a partially invariant condition.
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Direction
An enum for the direction of the loop.
A CRTP mix-in to automatically provide informational APIs needed for passes.