71#define DEBUG_TYPE "simple-loop-unswitch"
76STATISTIC(NumBranches,
"Number of branches unswitched");
77STATISTIC(NumSwitches,
"Number of switches unswitched");
78STATISTIC(NumSelects,
"Number of selects turned into branches for unswitching");
79STATISTIC(NumGuards,
"Number of guards turned into branches for unswitching");
80STATISTIC(NumTrivial,
"Number of unswitches that are trivial");
82 NumCostMultiplierSkipped,
83 "Number of unswitch candidates that had their cost multiplier skipped");
85 "Number of invariant conditions injected and unswitched");
89 cl::desc(
"Forcibly enables non-trivial loop unswitching rather than "
90 "following the configuration passed into the pass."));
94 cl::desc(
"The cost threshold for unswitching a loop."));
98 cl::desc(
"Enable unswitch cost multiplier that prohibits exponential "
99 "explosion in nontrivial unswitch."));
102 cl::desc(
"Toplevel siblings divisor for cost multiplier."));
105 cl::desc(
"Number of unswitch candidates that are ignored when calculating "
106 "cost multiplier."));
109 cl::desc(
"If enabled, simple loop unswitching will also consider "
110 "llvm.experimental.guard intrinsics as unswitch candidates."));
112 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
114 cl::desc(
"If enabled, drop make.implicit metadata in unswitched implicit "
115 "null checks to save time analyzing if we can keep it."));
118 cl::desc(
"Max number of memory uses to explore during "
119 "partial unswitching analysis"),
123 cl::desc(
"If enabled, the freeze instruction will be added to condition "
124 "of loop unswitch to prevent miscompilation."));
127 "simple-loop-unswitch-inject-invariant-conditions",
cl::Hidden,
128 cl::desc(
"Whether we should inject new invariants and unswitch them to "
129 "eliminate some existing (non-invariant) conditions."),
133 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
135 "unswitch on them to eliminate branches that are "
136 "not-taken 1/<this option> times or less."),
146 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
149struct InjectedInvariant {
157 : Pred(Pred),
LHS(
LHS),
RHS(
RHS), InLoopSucc(InLoopSucc) {}
160struct NonTrivialUnswitchCandidate {
163 std::optional<InstructionCost>
Cost;
164 std::optional<InjectedInvariant> PendingInjection;
165 NonTrivialUnswitchCandidate(
167 std::optional<InstructionCost>
Cost = std::nullopt,
168 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
169 : TI(TI), Invariants(Invariants),
Cost(
Cost),
170 PendingInjection(PendingInjection) {};
172 bool hasPendingInjection()
const {
return PendingInjection.has_value(); }
196 assert(!L.isLoopInvariant(&Root) &&
197 "Only need to walk the graph if root itself is not invariant.");
210 for (
Value *OpV :
I.operand_values()) {
212 if (isa<Constant>(OpV))
216 if (L.isLoopInvariant(OpV)) {
227 if (Visited.
insert(OpI).second)
231 }
while (!Worklist.
empty());
238 assert(!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?");
243 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
246 if (UserI && L.contains(UserI))
257 auto *PN = dyn_cast<PHINode>(&
I);
264 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
280 for (
Value *Inv : Invariants) {
289 Direction ? &NormalSucc : &UnswitchedSucc);
298 for (
auto *Val :
reverse(ToDuplicate)) {
312 auto *DefiningAccess = MemUse->getDefiningAccess();
314 while (L.contains(DefiningAccess->getBlock())) {
317 if (
auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
319 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
321 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
332 Direction ? &NormalSucc : &UnswitchedSucc);
349 for (
auto i : seq<int>(0, PN.getNumOperands())) {
350 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
351 "Found incoming block different from unique predecessor!");
352 PN.setIncomingBlock(i, &OldPH);
369 assert(&ExitBB != &UnswitchedBB &&
370 "Must have different loop exit and unswitched blocks!");
374 PN.getName() +
".split");
375 NewPN->insertBefore(InsertPt);
386 for (
int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
387 if (PN.getIncomingBlock(i) != &OldExitingBB)
390 Value *Incoming = PN.getIncomingValue(i);
393 PN.removeIncomingValue(i);
395 NewPN->addIncoming(Incoming, &OldPH);
401 NewPN->addIncoming(&PN, &ExitBB);
414 Loop *OldParentL = L.getParentLoop();
419 L.getExitBlocks(Exits);
420 Loop *NewParentL =
nullptr;
421 for (
auto *ExitBB : Exits)
423 if (!NewParentL || NewParentL->
contains(ExitL))
426 if (NewParentL == OldParentL)
432 "Can only hoist this loop up the nest!");
437 "Parent loop of this loop should contain this loop's preheader!");
452 for (
Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
456 return BB == &Preheader || L.contains(BB);
459 OldContainingL->getBlocksSet().erase(&Preheader);
461 OldContainingL->getBlocksSet().erase(BB);
484 Loop *Current = TopMost;
514 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch branch: " << BI <<
"\n");
521 bool FullUnswitch =
false;
524 if (L.isLoopInvariant(
Cond)) {
528 if (
auto *CondInst = dyn_cast<Instruction>(
Cond))
530 if (Invariants.
empty()) {
537 bool ExitDirection =
true;
538 int LoopExitSuccIdx = 0;
540 if (L.contains(LoopExitBB)) {
541 ExitDirection =
false;
544 if (L.contains(LoopExitBB)) {
549 auto *ContinueBB = BI.
getSuccessor(1 - LoopExitSuccIdx);
552 LLVM_DEBUG(
dbgs() <<
" Loop exit PHI's aren't loop-invariant!\n");
565 "non-full unswitch!\n");
571 dbgs() <<
" unswitching trivial invariant conditions for: " << BI
573 for (
Value *Invariant : Invariants) {
574 dbgs() <<
" " << *Invariant <<
" == true";
575 if (Invariant != Invariants.back())
607 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
609 "A branch's parent isn't a predecessor!");
610 UnswitchedBB = LoopExitBB;
613 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU,
"",
false);
645 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
649 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
652 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
663 Updates.
push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
671 ParentBB->getTerminator()->eraseFromParent();
683 if (UnswitchedBB == LoopExitBB)
687 *ParentBB, *OldPH, FullUnswitch);
698 for (
Value *Invariant : Invariants)
744 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch switch: " << SI <<
"\n");
745 Value *LoopCond = SI.getCondition();
748 if (!L.isLoopInvariant(LoopCond))
751 auto *ParentBB = SI.getParent();
758 auto IsTriviallyUnswitchableExitBlock = [&](
BasicBlock &BBToCheck) {
760 if (L.contains(&BBToCheck))
769 auto *TI = BBToCheck.getTerminator();
770 bool isUnreachable = isa<UnreachableInst>(TI);
771 return !isUnreachable ||
772 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
776 for (
auto Case : SI.cases())
777 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
778 ExitCaseIndices.
push_back(Case.getCaseIndex());
782 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
783 DefaultExitBB = SI.getDefaultDest();
784 }
else if (ExitCaseIndices.
empty())
799 if (!ExitL || ExitL->
contains(OuterL))
802 for (
unsigned Index : ExitCaseIndices) {
803 auto CaseI = SI.case_begin() +
Index;
806 if (!ExitL || ExitL->
contains(OuterL))
820 SI.setDefaultDest(
nullptr);
828 ExitCases.reserve(ExitCaseIndices.
size());
833 auto CaseI = SI.case_begin() +
Index;
836 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
844 if (SI.getNumCases() > 0 &&
846 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
848 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
849 if (!DefaultExitBB) {
853 if (SI.getNumCases() == 0)
854 CommonSuccBB = SI.getDefaultDest();
855 else if (SI.getDefaultDest() != CommonSuccBB)
856 CommonSuccBB =
nullptr;
882 UnswitchedExitBBs.
insert(DefaultExitBB);
890 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
895 for (
auto &ExitCase :
reverse(ExitCases)) {
903 if (UnswitchedExitBBs.
insert(ExitBB).second)
910 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
919 std::get<1>(ExitCase) = SplitExitBB;
924 for (
auto &ExitCase :
reverse(ExitCases)) {
926 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
928 NewSIW.
addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
939 for (
const auto &Case : SI.cases())
942 }
else if (DefaultCaseWeight) {
945 for (
const auto &Case : SI.cases()) {
948 "case weight must be defined as default case weight is defined");
963 bool SkippedFirst = DefaultExitBB ==
nullptr;
964 for (
auto Case : SI.cases()) {
966 "Non-common successor!");
978 }
else if (DefaultExitBB) {
979 assert(SI.getNumCases() > 0 &&
980 "If we had no cases we'd have a common successor!");
985 auto LastCaseI = std::prev(SI.case_end());
987 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
998 for (
auto *UnswitchedExitBB : UnswitchedExitBBs) {
1002 for (
auto SplitUnswitchedPair : SplitExitBBMap) {
1003 DTUpdates.
push_back({DT.
Delete, ParentBB, SplitUnswitchedPair.first});
1015 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
1045 bool Changed =
false;
1060 Visited.
insert(CurrentBB);
1067 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1075 if (
auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1079 if (isa<Constant>(SI->getCondition()))
1093 auto *BI = dyn_cast<BranchInst>(CurrentBB->
getTerminator());
1094 if (!BI || BI->isConditional())
1097 CurrentBB = BI->getSuccessor(0);
1101 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1109 if (!BI->isConditional() ||
1124 if (BI->isConditional())
1128 CurrentBB = BI->getSuccessor(0);
1133 }
while (L.contains(CurrentBB) && Visited.
insert(CurrentBB).second);
1171 NewBlocks.
reserve(L.getNumBlocks() + ExitBlocks.
size());
1182 VMap[OldBB] = NewBB;
1190 auto It = DominatingSucc.
find(BB);
1191 return It != DominatingSucc.
end() && It->second != UnswitchedSuccBB;
1195 auto *ClonedPH = CloneBlock(LoopPH);
1198 for (
auto *LoopBB : L.blocks())
1199 if (!SkipBlock(LoopBB))
1205 for (
auto *ExitBB : ExitBlocks) {
1206 if (SkipBlock(ExitBB))
1214 auto *MergeBB =
SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1219 MergeBB->takeName(ExitBB);
1220 ExitBB->setName(
Twine(MergeBB->getName()) +
".split");
1223 auto *ClonedExitBB = CloneBlock(ExitBB);
1224 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1225 "Exit block should have been split to have one successor!");
1226 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1227 "Cloned exit block has the wrong successor!");
1233 std::prev(ClonedExitBB->end())))) {
1240 (isa<PHINode>(
I) || isa<LandingPadInst>(
I) || isa<CatchPadInst>(
I)) &&
1241 "Bad instruction in exit block!");
1243 assert(VMap.
lookup(&
I) == &ClonedI &&
"Mismatch in the value map!");
1246 if (SE && isa<PHINode>(
I))
1251 MergePN->insertBefore(MergeBB->getFirstInsertionPt());
1252 I.replaceAllUsesWith(MergePN);
1253 MergePN->addIncoming(&
I, ExitBB);
1254 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1263 for (
auto *ClonedBB : NewBlocks)
1267 if (
auto *II = dyn_cast<AssumeInst>(&
I))
1273 for (
auto *LoopBB : L.blocks())
1274 if (SkipBlock(LoopBB))
1276 if (
auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB)))
1277 for (
PHINode &PN : ClonedSuccBB->phis())
1278 PN.removeIncomingValue(LoopBB,
false);
1282 auto *ClonedParentBB = cast<BasicBlock>(VMap.
lookup(ParentBB));
1284 if (SuccBB == UnswitchedSuccBB)
1287 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB));
1291 ClonedSuccBB->removePredecessor(ClonedParentBB,
1297 auto *ClonedSuccBB = cast<BasicBlock>(VMap.
lookup(UnswitchedSuccBB));
1298 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1301 Value *ClonedConditionToErase =
nullptr;
1302 if (
auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1303 ClonedConditionToErase = BI->getCondition();
1304 else if (
auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1305 ClonedConditionToErase = SI->getCondition();
1310 if (ClonedConditionToErase)
1317 for (
PHINode &PN : ClonedSuccBB->phis()) {
1321 for (
int i = PN.getNumOperands() - 1; i >= 0; --i) {
1322 if (PN.getIncomingBlock(i) != ClonedParentBB)
1328 PN.removeIncomingValue(i,
false);
1334 for (
auto *ClonedBB : NewBlocks) {
1336 if (SuccSet.
insert(SuccBB).second)
1337 DTUpdates.
push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1352 auto AddClonedBlocksToLoop = [&](
Loop &OrigL,
Loop &ClonedL) {
1353 assert(ClonedL.getBlocks().empty() &&
"Must start with an empty loop!");
1355 for (
auto *BB : OrigL.
blocks()) {
1356 auto *ClonedBB = cast<BasicBlock>(VMap.
lookup(BB));
1357 ClonedL.addBlockEntry(ClonedBB);
1370 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1382 LoopsToClone.
push_back({ClonedRootL, ChildL});
1384 Loop *ClonedParentL, *L;
1385 std::tie(ClonedParentL, L) = LoopsToClone.
pop_back_val();
1388 AddClonedBlocksToLoop(*L, *ClonedL);
1390 LoopsToClone.
push_back({ClonedL, ChildL});
1391 }
while (!LoopsToClone.
empty());
1412 Loop *ClonedL =
nullptr;
1417 auto *ClonedPH = cast<BasicBlock>(VMap.
lookup(OrigPH));
1418 auto *ClonedHeader = cast<BasicBlock>(VMap.
lookup(OrigHeader));
1424 Loop *ParentL =
nullptr;
1428 for (
auto *ExitBB : ExitBlocks)
1429 if (
auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.
lookup(ExitBB)))
1431 ExitLoopMap[ClonedExitBB] = ExitL;
1432 ClonedExitsInLoops.
push_back(ClonedExitBB);
1433 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1438 "The computed parent loop should always contain (or be) the parent of "
1439 "the original loop.");
1446 for (
auto *BB : OrigL.
blocks())
1447 if (
auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB)))
1448 ClonedLoopBlocks.
insert(ClonedBB);
1459 if (Pred == ClonedPH)
1464 assert(ClonedLoopBlocks.
count(Pred) &&
"Found a predecessor of the loop "
1465 "header other than the preheader "
1466 "that is not part of the loop!");
1471 if (BlocksInClonedLoop.
insert(Pred).second && Pred != ClonedHeader)
1478 if (!BlocksInClonedLoop.
empty()) {
1479 BlocksInClonedLoop.
insert(ClonedHeader);
1481 while (!Worklist.
empty()) {
1484 "Didn't put block into the loop set!");
1492 if (ClonedLoopBlocks.
count(Pred) &&
1493 BlocksInClonedLoop.
insert(Pred).second)
1512 for (
auto *BB : OrigL.
blocks()) {
1513 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB));
1514 if (!ClonedBB || !BlocksInClonedLoop.
count(ClonedBB))
1526 for (
Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1527 PL->addBlockEntry(ClonedBB);
1534 for (
Loop *ChildL : OrigL) {
1535 auto *ClonedChildHeader =
1536 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1537 if (!ClonedChildHeader || !BlocksInClonedLoop.
count(ClonedChildHeader))
1543 for (
auto *ChildLoopBB : ChildL->blocks())
1545 cast<BasicBlock>(VMap.
lookup(ChildLoopBB))) &&
1546 "Child cloned loop has a header within the cloned outer "
1547 "loop but not all of its blocks!");
1562 if (BlocksInClonedLoop.
empty())
1563 UnloopedBlockSet.
insert(ClonedPH);
1564 for (
auto *ClonedBB : ClonedLoopBlocks)
1565 if (!BlocksInClonedLoop.
count(ClonedBB))
1566 UnloopedBlockSet.
insert(ClonedBB);
1572 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1574 return ExitLoopMap.
lookup(
LHS)->getLoopDepth() <
1575 ExitLoopMap.
lookup(
RHS)->getLoopDepth();
1580 while (!UnloopedBlockSet.
empty() && !OrderedClonedExitsInLoops.empty()) {
1581 assert(Worklist.
empty() &&
"Didn't clear worklist!");
1583 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1598 if (!UnloopedBlockSet.
erase(PredBB)) {
1600 (BlocksInClonedLoop.
count(PredBB) || ExitLoopMap.
count(PredBB)) &&
1601 "Predecessor not mapped to a loop!");
1608 bool Inserted = ExitLoopMap.
insert({PredBB, ExitL}).second;
1610 assert(Inserted &&
"Should only visit an unlooped block once!");
1615 }
while (!Worklist.
empty());
1624 for (
auto *BB : llvm::concat<BasicBlock *const>(
1625 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1627 OuterL->addBasicBlockToLoop(BB, LI);
1630 for (
auto &BBAndL : ExitLoopMap) {
1631 auto *BB = BBAndL.first;
1632 auto *OuterL = BBAndL.second;
1634 "Failed to put all blocks into outer loops!");
1641 for (
Loop *ChildL : OrigL) {
1642 auto *ClonedChildHeader =
1643 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1644 if (!ClonedChildHeader || BlocksInClonedLoop.
count(ClonedChildHeader))
1648 for (
auto *ChildLoopBB : ChildL->blocks())
1650 "Cloned a child loop header but not all of that loops blocks!");
1654 *ChildL, ExitLoopMap.
lookup(ClonedChildHeader), VMap, LI));
1660 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1664 for (
BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1665 for (
const auto &VMap : VMaps)
1666 if (
BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1669 SuccBB->removePredecessor(ClonedBB);
1682 BB->dropAllReferences();
1685 BB->eraseFromParent();
1703 DeathCandidates.
append(L.blocks().begin(), L.blocks().end());
1704 while (!DeathCandidates.
empty()) {
1708 SuccBB->removePredecessor(BB);
1725 for (
Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1726 for (
auto *BB : DeadBlockSet)
1727 ParentL->getBlocksSet().erase(BB);
1729 [&](
BasicBlock *BB) { return DeadBlockSet.count(BB); });
1735 if (!DeadBlockSet.count(ChildL->getHeader()))
1738 assert(llvm::all_of(ChildL->blocks(),
1739 [&](BasicBlock *ChildBB) {
1740 return DeadBlockSet.count(ChildBB);
1742 "If the child loop header is dead all blocks in the child loop must "
1743 "be dead as well!");
1744 DestroyLoopCB(*ChildL, ChildL->
getName());
1754 for (
auto *BB : DeadBlockSet) {
1756 assert(!DT.
getNode(BB) &&
"Should already have cleared domtree!");
1763 BB->dropAllReferences();
1768 for (
auto *BB : DeadBlockSet)
1769 BB->eraseFromParent();
1787 auto *PH = L.getLoopPreheader();
1788 auto *Header = L.getHeader();
1802 assert(L.contains(Pred) &&
"Found a predecessor of the loop header other "
1803 "than the preheader that is not part of the "
1809 if (LoopBlockSet.
insert(Pred).second && Pred != Header)
1814 if (LoopBlockSet.
empty())
1815 return LoopBlockSet;
1818 while (!Worklist.
empty()) {
1820 assert(LoopBlockSet.
count(BB) &&
"Didn't put block into the loop set!");
1832 assert(L.contains(InnerL) &&
1833 "Should not reach a loop *outside* this loop!");
1836 auto *InnerPH = InnerL->getLoopPreheader();
1837 assert(L.contains(InnerPH) &&
"Cannot contain an inner loop block "
1838 "but not contain the inner loop "
1840 if (!LoopBlockSet.
insert(InnerPH).second)
1850 for (
auto *InnerBB : InnerL->blocks()) {
1851 if (InnerBB == BB) {
1853 "Block should already be in the set!");
1857 LoopBlockSet.
insert(InnerBB);
1869 if (L.contains(Pred) && LoopBlockSet.
insert(Pred).second)
1873 assert(LoopBlockSet.
count(Header) &&
"Cannot fail to add the header!");
1877 return LoopBlockSet;
1898 auto *PH = L.getLoopPreheader();
1902 Loop *ParentL =
nullptr;
1906 for (
auto *ExitBB : ExitBlocks)
1910 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1922 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1924 for (
Loop *IL = L.getParentLoop(); IL != ParentL;
1926 IL->getBlocksSet().erase(PH);
1927 for (
auto *BB : L.blocks())
1928 IL->getBlocksSet().erase(BB);
1930 return BB == PH || L.contains(BB);
1935 L.getParentLoop()->removeChildLoop(&L);
1943 auto &
Blocks = L.getBlocksVector();
1945 LoopBlockSet.empty()
1947 : std::stable_partition(
1949 [&](
BasicBlock *BB) { return LoopBlockSet.count(BB); });
1953 if (LoopBlockSet.empty())
1954 UnloopedBlocks.
insert(PH);
1958 L.getBlocksSet().erase(BB);
1969 Loop *PrevExitL = L.getParentLoop();
1971 auto RemoveUnloopedBlocksFromLoop =
1973 for (
auto *BB : UnloopedBlocks)
1974 L.getBlocksSet().erase(BB);
1976 return UnloopedBlocks.count(BB);
1981 while (!UnloopedBlocks.
empty() && !ExitsInLoops.
empty()) {
1982 assert(Worklist.
empty() &&
"Didn't clear worklist!");
1983 assert(NewExitLoopBlocks.empty() &&
"Didn't clear loop set!");
1988 assert(ExitL.
contains(&L) &&
"Exit loop must contain the inner loop!");
1994 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->
getParentLoop())
1995 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2009 if (!UnloopedBlocks.
erase(PredBB)) {
2010 assert((NewExitLoopBlocks.count(PredBB) ||
2012 "Predecessor not in a nested loop (or already visited)!");
2019 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2021 assert(Inserted &&
"Should only visit an unlooped block once!");
2026 }
while (!Worklist.
empty());
2031 for (
auto *BB : NewExitLoopBlocks)
2033 if (BBL == &L || !L.contains(BBL))
2038 NewExitLoopBlocks.clear();
2044 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2045 for (
auto *BB : UnloopedBlocks)
2047 if (BBL == &L || !L.contains(BBL))
2053 auto &SubLoops = L.getSubLoopsVector();
2054 auto SubLoopsSplitI =
2055 LoopBlockSet.empty()
2057 : std::stable_partition(
2058 SubLoops.begin(), SubLoops.end(), [&](
Loop *SubL) {
2059 return LoopBlockSet.count(SubL->getHeader());
2061 for (
auto *HoistedL :
make_range(SubLoopsSplitI, SubLoops.end())) {
2063 HoistedL->setParentLoop(
nullptr);
2073 if (
auto *NewParentL = LI.
getLoopFor(HoistedL->getLoopPreheader()))
2074 NewParentL->addChildLoop(HoistedL);
2078 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2082 assert(SubLoops.empty() &&
2083 "Failed to remove all subloops from the original loop!");
2084 if (
Loop *ParentL = L.getParentLoop())
2102template <
typename CallableT>
2114 if (!Callable(
N->getBlock()))
2120 "Cannot visit a node twice when walking a tree!");
2123 }
while (!DomWorklist.
empty());
2133 bool InjectedCondition) {
2136 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2142 "Can only unswitch switches and conditional branch!");
2146 !PartiallyInvariant);
2149 "Cannot have other invariants with full unswitching!");
2152 "Partial unswitching requires an instruction as the condition!");
2165 if (!FullUnswitch) {
2169 PartiallyInvariant) &&
2170 "Only `or`, `and`, an `select`, partially invariant instructions "
2171 "can combine invariants being unswitched.");
2182 BI ? BI->
getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2187 for (
auto Case : SI->cases())
2188 if (Case.getCaseSuccessor() != RetainedSuccBB)
2189 UnswitchedSuccBBs.
insert(Case.getCaseSuccessor());
2191 assert(!UnswitchedSuccBBs.
count(RetainedSuccBB) &&
2192 "Should not unswitch the same successor we are retaining!");
2201 Loop *ParentL = L.getParentLoop();
2210 Loop *OuterExitL = &L;
2212 L.getUniqueExitBlocks(ExitBlocks);
2213 for (
auto *ExitBB : ExitBlocks) {
2217 if (!NewOuterExitL) {
2219 OuterExitL =
nullptr;
2222 if (NewOuterExitL != OuterExitL && NewOuterExitL->
contains(OuterExitL))
2223 OuterExitL = NewOuterExitL;
2243 for (
auto *SuccBB : llvm::concat<BasicBlock *const>(
ArrayRef(RetainedSuccBB),
2245 if (SuccBB->getUniquePredecessor() ||
2247 return PredBB == ParentBB || DT.
dominates(SuccBB, PredBB);
2250 DominatingSucc[BB] = SuccBB;
2269 for (
auto *SuccBB : UnswitchedSuccBBs) {
2272 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2273 DominatingSucc, *VMaps.
back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2278 if (TI.
getMetadata(LLVMContext::MD_make_implicit)) {
2282 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2289 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2296 SplitBB->getTerminator()->eraseFromParent();
2304 NewTI->
insertInto(ParentBB, ParentBB->end());
2314 Cond,
Cond->getName() +
".fr", BI);
2316 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2318 assert(SI &&
"Must either be a branch or switch!");
2321 assert(SI->getDefaultDest() == RetainedSuccBB &&
2322 "Not retaining default successor!");
2323 SI->setDefaultDest(LoopPH);
2324 for (
const auto &Case : SI->cases())
2325 if (Case.getCaseSuccessor() == RetainedSuccBB)
2326 Case.setSuccessor(LoopPH);
2328 Case.setSuccessor(ClonedPHs.
find(Case.getCaseSuccessor())->second);
2332 SI->getCondition(), SI->getCondition()->getName() +
".fr", SI));
2339 {DominatorTree::Insert, SplitBB, ClonedPHs.
find(SuccBB)->second});
2353 for (
auto &VMap : VMaps)
2369 "Only one possible unswitched block for a branch!");
2373 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2383 "Not retaining default successor!");
2384 for (
const auto &Case : NewSI->
cases())
2385 Case.getCaseSuccessor()->removePredecessor(
2393 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, SuccBB});
2397 ParentBB->getTerminator()->eraseFromParent();
2403 assert(BI &&
"Only branches have partial unswitching.");
2405 "Only one possible unswitched block for a branch!");
2409 if (PartiallyInvariant)
2411 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH, L, MSSAU);
2414 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH,
2417 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2424 for (
auto &VMap : VMaps)
2444 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2467 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
2469 if (BI && !PartiallyInvariant) {
2475 "Only one possible unswitched block for a branch!");
2487 bool ReplaceUnswitched =
2488 FullUnswitch || (Invariants.
size() == 1) || PartiallyInvariant;
2496 for (
Value *Invariant : Invariants) {
2497 assert(!isa<Constant>(Invariant) &&
2498 "Should not be replacing constant values!");
2501 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2508 U.set(ContinueReplacement);
2509 else if (ReplaceUnswitched &&
2511 U.set(UnswitchedReplacement);
2528 auto UpdateLoop = [&](
Loop &UpdateL) {
2530 UpdateL.verifyLoop();
2531 for (
Loop *ChildL : UpdateL) {
2532 ChildL->verifyLoop();
2533 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2534 "Perturbed a child loop's LCSSA form!");
2554 for (
Loop *UpdatedL :
2555 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2556 UpdateLoop(*UpdatedL);
2557 if (UpdatedL->isOutermost())
2558 OuterExitL =
nullptr;
2562 if (L.isOutermost())
2563 OuterExitL =
nullptr;
2568 if (OuterExitL != &L)
2569 for (
Loop *OuterL = ParentL; OuterL != OuterExitL;
2571 UpdateLoop(*OuterL);
2583 for (
Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2584 if (UpdatedL->getParentLoop() == ParentL)
2586 UnswitchCB(IsStillLoop, PartiallyInvariant, InjectedCondition, SibLoops);
2609 auto BBCostIt = BBCostMap.
find(
N.getBlock());
2610 if (BBCostIt == BBCostMap.
end())
2614 auto DTCostIt = DTCostMap.
find(&
N);
2615 if (DTCostIt != DTCostMap.
end())
2616 return DTCostIt->second;
2621 N.begin(),
N.end(), BBCostIt->second,
2623 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2625 bool Inserted = DTCostMap.
insert({&
N,
Cost}).second;
2627 assert(Inserted &&
"Should not insert a node while visiting children!");
2662 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2664 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2665 *TailBB = CondBr->getSuccessor(1);
2670 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2671 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2672 SI->replaceAllUsesWith(Phi);
2673 SI->eraseFromParent();
2716 GI->
getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2723 GuardedBlock->
setName(
"guarded");
2774 return L.contains(SuccBB);
2776 NumCostMultiplierSkipped++;
2780 auto *ParentL = L.getParentLoop();
2781 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2782 : std::distance(LI.
begin(), LI.
end()));
2786 int UnswitchedClones = 0;
2787 for (
const auto &Candidate : UnswitchCandidates) {
2790 bool SkipExitingSuccessors = DT.
dominates(CondBlock, Latch);
2791 if (isa<SelectInst>(CI)) {
2796 if (!SkipExitingSuccessors)
2800 int NonExitingSuccessors =
2802 [SkipExitingSuccessors, &L](
const BasicBlock *SuccBB) {
2803 return !SkipExitingSuccessors || L.contains(SuccBB);
2805 UnswitchedClones +=
Log2_32(NonExitingSuccessors);
2813 unsigned ClonesPower =
2817 int SiblingsMultiplier =
2818 std::max((ParentL ? SiblingsCount
2828 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2832 <<
" (siblings " << SiblingsMultiplier <<
" * clones "
2833 << (1 << ClonesPower) <<
")"
2834 <<
" for unswitch candidate: " << TI <<
"\n");
2835 return CostMultiplier;
2843 assert(UnswitchCandidates.
empty() &&
"Should be!");
2847 if (isa<Constant>(
Cond))
2849 if (L.isLoopInvariant(
Cond)) {
2857 if (!Invariants.
empty())
2858 UnswitchCandidates.
push_back({
I, std::move(Invariants)});
2863 bool CollectGuards =
false;
2865 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2867 if (GuardDecl && !GuardDecl->use_empty())
2868 CollectGuards =
true;
2871 for (
auto *BB : L.blocks()) {
2875 for (
auto &
I : *BB) {
2876 if (
auto *SI = dyn_cast<SelectInst>(&
I)) {
2877 auto *
Cond = SI->getCondition();
2879 if (
Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2880 AddUnswitchCandidatesForInst(SI,
Cond);
2881 }
else if (CollectGuards &&
isGuard(&
I)) {
2885 if (!isa<Constant>(
Cond) && L.isLoopInvariant(
Cond))
2890 if (
auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2893 if (!isa<Constant>(SI->getCondition()) &&
2894 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2895 UnswitchCandidates.
push_back({SI, {SI->getCondition()}});
2899 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2900 if (!BI || !BI->isConditional() ||
2901 BI->getSuccessor(0) == BI->getSuccessor(1))
2904 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2908 !
any_of(UnswitchCandidates, [&L](
auto &TerminatorAndInvariants) {
2909 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2914 dbgs() <<
"simple-loop-unswitch: Found partially invariant condition "
2915 << *
Info->InstToDuplicate[0] <<
"\n");
2916 PartialIVInfo = *
Info;
2917 PartialIVCondBranch = L.getHeader()->getTerminator();
2921 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2924 return !UnswitchCandidates.
empty();
2937 if (!L.contains(IfTrue)) {
2938 Pred = ICmpInst::getInversePredicate(Pred);
2943 if (L.isLoopInvariant(
LHS)) {
2944 Pred = ICmpInst::getSwappedPredicate(Pred);
2950 Pred = ICmpInst::ICMP_ULT;
2963 if (L.isLoopInvariant(
LHS) || !L.isLoopInvariant(
RHS))
2966 if (Pred != ICmpInst::ICMP_ULT)
2969 if (!L.contains(IfTrue) || L.contains(IfFalse))
2973 if (L.getHeader() == IfTrue)
2990 assert(Weights.
size() == 2 &&
"Unexpected profile data!");
2992 auto Num = Weights[
Idx];
2993 auto Denom = Weights[0] + Weights[1];
2995 if (Denom == 0 || Num > Denom)
2998 if (LikelyTaken > ActualTaken)
3021static NonTrivialUnswitchCandidate
3025 assert(Candidate.hasPendingInjection() &&
"Nothing to inject!");
3026 BasicBlock *Preheader = L.getLoopPreheader();
3027 assert(Preheader &&
"Loop is not in simplified form?");
3029 "Unswitching branch of inner loop!");
3031 auto Pred = Candidate.PendingInjection->Pred;
3032 auto *
LHS = Candidate.PendingInjection->LHS;
3033 auto *
RHS = Candidate.PendingInjection->RHS;
3034 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3035 auto *TI = cast<BranchInst>(Candidate.TI);
3036 auto *BB = Candidate.TI->getParent();
3040 assert(L.contains(InLoopSucc) &&
"Not supported yet!");
3041 assert(!L.contains(OutOfLoopSucc) &&
"Not supported yet!");
3042 auto &Ctx = BB->getContext();
3045 assert(ICmpInst::isUnsigned(Pred) &&
"Not supported yet!");
3055 auto *InjectedCond =
3056 ICmpInst::Create(Instruction::ICmp, Pred,
LHS,
RHS,
"injected.cond",
3060 BB->getParent(), InLoopSucc);
3063 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3065 Builder.SetInsertPoint(CheckBlock);
3071 for (
auto &
I : *InLoopSucc) {
3072 auto *PN = dyn_cast<PHINode>(&
I);
3075 auto *Inc = PN->getIncomingValueForBlock(BB);
3076 PN->addIncoming(Inc, CheckBlock);
3078 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3081 { DominatorTree::Insert, BB, CheckBlock },
3082 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3083 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3084 { DominatorTree::Delete, BB, OutOfLoopSucc }
3090 L.addBasicBlockToLoop(CheckBlock, LI);
3102 LLVM_DEBUG(
dbgs() <<
"Injected a new loop-invariant branch " << *InvariantBr
3103 <<
" and considering it for unswitching.");
3104 ++NumInvariantConditionsInjected;
3105 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3126 assert(ICmpInst::isStrictPredicate(Pred));
3127 if (Compares.
size() < 2)
3130 for (
auto Prev = Compares.
begin(), Next = Compares.
begin() + 1;
3131 Next != Compares.
end(); ++Prev, ++Next) {
3135 InjectedInvariant ToInject(NonStrictPred,
LHS,
RHS, InLoopSucc);
3136 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3137 std::nullopt, std::move(ToInject));
3138 UnswitchCandidates.
push_back(std::move(Candidate));
3168 auto *Latch = L.getLoopLatch();
3172 assert(L.getLoopPreheader() &&
"Must have a preheader!");
3177 for (
auto *DTN = DT.
getNode(Latch); L.contains(DTN->getBlock());
3178 DTN = DTN->getIDom()) {
3181 BasicBlock *IfTrue =
nullptr, *IfFalse =
nullptr;
3182 auto *BB = DTN->getBlock();
3186 auto *Term = BB->getTerminator();
3200 CompareDesc
Desc(cast<BranchInst>(Term),
RHS, IfTrue);
3201 while (
auto *Zext = dyn_cast<ZExtInst>(
LHS))
3202 LHS = Zext->getOperand(0);
3203 CandidatesULT[
LHS].push_back(
Desc);
3207 for (
auto &It : CandidatesULT)
3209 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3214 if (!L.isSafeToClone())
3216 for (
auto *BB : L.blocks())
3217 for (
auto &
I : *BB) {
3218 if (
I.getType()->isTokenTy() &&
I.isUsedOutsideOfBlock(BB))
3220 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
3221 assert(!CB->cannotDuplicate() &&
"Checked by L.isSafeToClone().");
3222 if (CB->isConvergent())
3235 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3239 L.getUniqueExitBlocks(ExitBlocks);
3244 for (
auto *ExitBB : ExitBlocks) {
3245 auto *
I = ExitBB->getFirstNonPHI();
3246 if (isa<CleanupPadInst>(
I) || isa<CatchSwitchInst>(
I)) {
3247 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch because of cleanuppad/catchswitch "
3275 L.getHeader()->getParent()->hasMinSize()
3279 for (
auto *BB : L.blocks()) {
3281 for (
auto &
I : *BB) {
3286 assert(
Cost >= 0 &&
"Must not have negative costs!");
3288 assert(LoopCost >= 0 &&
"Must not have negative loop costs!");
3289 BBCostMap[BB] =
Cost;
3313 if (isa<SelectInst>(TI))
3322 if (!Visited.
insert(SuccBB).second)
3330 if (!FullUnswitch) {
3331 auto &BI = cast<BranchInst>(TI);
3334 if (SuccBB == BI.getSuccessor(1))
3337 if (SuccBB == BI.getSuccessor(0))
3340 SuccBB == BI.getSuccessor(0)) ||
3342 SuccBB == BI.getSuccessor(1)))
3350 if (SuccBB->getUniquePredecessor() ||
3352 return PredBB == &BB || DT.
dominates(SuccBB, PredBB);
3356 "Non-duplicated cost should never exceed total loop cost!");
3365 int SuccessorsCount =
isGuard(&TI) ? 2 : Visited.
size();
3366 assert(SuccessorsCount > 1 &&
3367 "Cannot unswitch a condition without multiple distinct successors!");
3368 return (LoopCost -
Cost) * (SuccessorsCount - 1);
3371 std::optional<NonTrivialUnswitchCandidate> Best;
3372 for (
auto &Candidate : UnswitchCandidates) {
3377 !BI || Candidate.hasPendingInjection() ||
3378 (Invariants.
size() == 1 &&
3380 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3384 int CostMultiplier =
3388 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3389 CandidateCost *= CostMultiplier;
3391 <<
" (multiplier: " << CostMultiplier <<
")"
3392 <<
" for unswitch candidate: " << TI <<
"\n");
3395 <<
" for unswitch candidate: " << TI <<
"\n");
3398 if (!Best || CandidateCost < Best->
Cost) {
3400 Best->Cost = CandidateCost;
3403 assert(Best &&
"Must be!");
3415 assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3425 if (
BranchInst *BI = dyn_cast<BranchInst>(&TI))
3430 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3445 PartialIVCondBranch, L, LI, AA, MSSAU);
3448 PartialIVCondBranch, L, DT, LI, AA,
3451 if (UnswitchCandidates.
empty())
3455 dbgs() <<
"Considering " << UnswitchCandidates.
size()
3456 <<
" non-trivial loop invariant conditions for unswitching.\n");
3459 UnswitchCandidates, L, DT, LI, AC,
TTI, PartialIVInfo);
3461 assert(Best.TI &&
"Failed to find loop unswitch candidate");
3462 assert(Best.Cost &&
"Failed to compute cost");
3465 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch, lowest cost found: " << *Best.Cost
3470 bool InjectedCondition =
false;
3471 if (Best.hasPendingInjection()) {
3473 InjectedCondition =
true;
3475 assert(!Best.hasPendingInjection() &&
3476 "All injections should have been done by now!");
3478 if (Best.TI != PartialIVCondBranch)
3482 if (
auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3488 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3498 LLVM_DEBUG(
dbgs() <<
" Unswitching non-trivial (cost = " << Best.Cost
3499 <<
") terminator: " << *Best.TI <<
"\n");
3501 LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB,
3502 InsertFreeze, InjectedCondition);
3535 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3536 "Loops must be in LCSSA form before unswitching.");
3539 if (!L.isLoopSimplifyForm())
3546 UnswitchCB(
true,
false,
3551 const Function *
F = L.getHeader()->getParent();
3564 bool ContinueWithNonTrivial =
3566 if (!ContinueWithNonTrivial)
3570 if (
F->hasOptSize())
3575 auto IsLoopNestCold = [&](
const Loop *L) {
3581 Parent = Parent->getParentLoop();
3585 Worklist.
insert(Worklist.
end(), L->getSubLoops().begin(),
3586 L->getSubLoops().end());
3587 while (!Worklist.
empty()) {
3591 Worklist.
insert(Worklist.
end(), CurLoop->getSubLoops().begin(),
3592 CurLoop->getSubLoops().end());
3627 Function &
F = *L.getHeader()->getParent();
3630 if (
auto OuterProxy =
3634 LLVM_DEBUG(
dbgs() <<
"Unswitching loop in " <<
F.getName() <<
": " << L
3639 std::string LoopName = std::string(L.getName());
3641 auto UnswitchCB = [&L, &U, &LoopName](
bool CurrentLoopValid,
3642 bool PartiallyInvariant,
3643 bool InjectedCondition,
3646 if (!NewLoops.empty())
3647 U.addSiblingLoops(NewLoops);
3651 if (CurrentLoopValid) {
3652 if (PartiallyInvariant) {
3655 auto &
Context = L.getHeader()->getContext();
3660 Context, L.getLoopID(), {
"llvm.loop.unswitch.partial"},
3661 {DisableUnswitchMD});
3662 L.setLoopID(NewLoopID);
3663 }
else if (InjectedCondition) {
3665 auto &
Context = L.getHeader()->getContext();
3670 Context, L.getLoopID(), {
"llvm.loop.unswitch.injection"},
3671 {DisableUnswitchMD});
3672 L.setLoopID(NewLoopID);
3674 U.revisitCurrentLoop();
3676 U.markLoopAsDeleted(L, LoopName);
3680 U.markLoopAsDeleted(L,
Name);
3683 std::optional<MemorySSAUpdater> MSSAU;
3690 UnswitchCB, &AR.
SE, MSSAU ? &*MSSAU :
nullptr, PSI, AR.
BFI,
3710 OS, MapClassName2PassName);
3713 OS << (NonTrivial ?
"" :
"no-") <<
"nontrivial;";
3714 OS << (Trivial ?
"" :
"no-") <<
"trivial";
3720class SimpleLoopUnswitchLegacyPass :
public LoopPass {
3726 explicit SimpleLoopUnswitchLegacyPass(
bool NonTrivial =
false)
3751 LLVM_DEBUG(
dbgs() <<
"Unswitching loop in " <<
F.getName() <<
": " << *L
3753 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3754 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3755 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
3756 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3757 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
3758 MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3761 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3762 auto *SE = SEWP ? &SEWP->getSE() :
nullptr;
3764 auto UnswitchCB = [&
L, &LPM](
bool CurrentLoopValid,
bool PartiallyInvariant,
3765 bool InjectedCondition,
3768 for (
auto *NewL : NewLoops)
3774 if (CurrentLoopValid) {
3778 if (!PartiallyInvariant && !InjectedCondition)
3791 unswitchLoop(*L, DT, LI, AC, AA,
TTI,
true, NonTrivial, UnswitchCB, SE,
3792 &MSSAU,
nullptr,
nullptr, DestroyLoopCB);
3799 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
3804char SimpleLoopUnswitchLegacyPass::ID = 0;
3806 "Simple unswitch loops",
false,
false)
3817 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static void canonicalizeForInvariantConditionInjection(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref< void(bool, bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze, bool InjectedCondition)
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref< void(bool, bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Unswitch control flow predicated on loop invariant conditions.
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This defines the Use class.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
bool isOneValue() const
Returns true if the value is one.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
const BasicBlock * getParent() const
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool isTerminator() const
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
SymbolTableList< Instruction >::iterator insertInto(BasicBlock *ParentBB, SymbolTableList< Instruction >::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L)
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
StringRef getName() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
An analysis that produces MemorySSA for a function.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block.
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
unsigned getIntegerBitWidth() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
auto reverse(ContainerTy &&C)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
void sort(IteratorTy Start, IteratorTy End)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool VerifyLoopInfo
Enable verification of loop info.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Direction
An enum for the direction of the loop.
A CRTP mix-in to automatically provide informational APIs needed for passes.