LLVM 17.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Use.h"
47#include "llvm/IR/Value.h"
49#include "llvm/Pass.h"
52#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <iterator>
66#include <numeric>
67#include <optional>
68#include <utility>
69
70#define DEBUG_TYPE "simple-loop-unswitch"
71
72using namespace llvm;
73using namespace llvm::PatternMatch;
74
75STATISTIC(NumBranches, "Number of branches unswitched");
76STATISTIC(NumSwitches, "Number of switches unswitched");
77STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial, "Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81 "Number of unswitch candidates that had their cost multiplier skipped");
82STATISTIC(NumInvariantConditionsInjected,
83 "Number of invariant conditions injected and unswitched");
84
86 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
87 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
88 "following the configuration passed into the pass."));
89
90static cl::opt<int>
91 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
92 cl::desc("The cost threshold for unswitching a loop."));
93
95 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
96 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
97 "explosion in nontrivial unswitch."));
99 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
100 cl::desc("Toplevel siblings divisor for cost multiplier."));
102 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
103 cl::desc("Number of unswitch candidates that are ignored when calculating "
104 "cost multiplier."));
106 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
107 cl::desc("If enabled, simple loop unswitching will also consider "
108 "llvm.experimental.guard intrinsics as unswitch candidates."));
110 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
111 cl::init(false), cl::Hidden,
112 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
113 "null checks to save time analyzing if we can keep it."));
115 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
116 cl::desc("Max number of memory uses to explore during "
117 "partial unswitching analysis"),
118 cl::init(100), cl::Hidden);
120 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
121 cl::desc("If enabled, the freeze instruction will be added to condition "
122 "of loop unswitch to prevent miscompilation."));
123
125 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
126 cl::desc("Whether we should inject new invariants and unswitch them to "
127 "eliminate some existing (non-invariant) conditions."),
128 cl::init(true));
129
131 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
132 cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
133 "unswitch on them to eliminate branches that are "
134 "not-taken 1/<this option> times or less."),
135 cl::init(16));
136
137namespace {
138struct CompareDesc {
139 BranchInst *Term;
140 Value *Invariant;
141 BasicBlock *InLoopSucc;
142
143 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
144 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
145};
146
147struct InjectedInvariant {
149 Value *LHS;
150 Value *RHS;
151 BasicBlock *InLoopSucc;
152
153 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
154 BasicBlock *InLoopSucc)
155 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
156};
157
158struct NonTrivialUnswitchCandidate {
159 Instruction *TI = nullptr;
160 TinyPtrVector<Value *> Invariants;
161 std::optional<InstructionCost> Cost;
162 std::optional<InjectedInvariant> PendingInjection;
163 NonTrivialUnswitchCandidate(
164 Instruction *TI, ArrayRef<Value *> Invariants,
165 std::optional<InstructionCost> Cost = std::nullopt,
166 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
167 : TI(TI), Invariants(Invariants), Cost(Cost),
168 PendingInjection(PendingInjection) {};
169
170 bool hasPendingInjection() const { return PendingInjection.has_value(); }
171};
172} // end anonymous namespace.
173
174// Helper to skip (select x, true, false), which matches both a logical AND and
175// OR and can confuse code that tries to determine if \p Cond is either a
176// logical AND or OR but not both.
178 Value *CondNext;
179 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
180 Cond = CondNext;
181 return Cond;
182}
183
184/// Collect all of the loop invariant input values transitively used by the
185/// homogeneous instruction graph from a given root.
186///
187/// This essentially walks from a root recursively through loop variant operands
188/// which have perform the same logical operation (AND or OR) and finds all
189/// inputs which are loop invariant. For some operations these can be
190/// re-associated and unswitched out of the loop entirely.
193 const LoopInfo &LI) {
194 assert(!L.isLoopInvariant(&Root) &&
195 "Only need to walk the graph if root itself is not invariant.");
196 TinyPtrVector<Value *> Invariants;
197
198 bool IsRootAnd = match(&Root, m_LogicalAnd());
199 bool IsRootOr = match(&Root, m_LogicalOr());
200
201 // Build a worklist and recurse through operators collecting invariants.
204 Worklist.push_back(&Root);
205 Visited.insert(&Root);
206 do {
207 Instruction &I = *Worklist.pop_back_val();
208 for (Value *OpV : I.operand_values()) {
209 // Skip constants as unswitching isn't interesting for them.
210 if (isa<Constant>(OpV))
211 continue;
212
213 // Add it to our result if loop invariant.
214 if (L.isLoopInvariant(OpV)) {
215 Invariants.push_back(OpV);
216 continue;
217 }
218
219 // If not an instruction with the same opcode, nothing we can do.
220 Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
221
222 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
223 (IsRootOr && match(OpI, m_LogicalOr())))) {
224 // Visit this operand.
225 if (Visited.insert(OpI).second)
226 Worklist.push_back(OpI);
227 }
228 }
229 } while (!Worklist.empty());
230
231 return Invariants;
232}
233
234static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
235 Constant &Replacement) {
236 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
237
238 // Replace uses of LIC in the loop with the given constant.
239 // We use make_early_inc_range as set invalidates the iterator.
240 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
241 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
242
243 // Replace this use within the loop body.
244 if (UserI && L.contains(UserI))
245 U.set(&Replacement);
246 }
247}
248
249/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
250/// incoming values along this edge.
252 const BasicBlock &ExitingBB,
253 const BasicBlock &ExitBB) {
254 for (const Instruction &I : ExitBB) {
255 auto *PN = dyn_cast<PHINode>(&I);
256 if (!PN)
257 // No more PHIs to check.
258 return true;
259
260 // If the incoming value for this edge isn't loop invariant the unswitch
261 // won't be trivial.
262 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
263 return false;
264 }
265 llvm_unreachable("Basic blocks should never be empty!");
266}
267
268/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
269/// end of \p BB and conditionally branch on the copied condition. We only
270/// branch on a single value.
272 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
273 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
274 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
275 IRBuilder<> IRB(&BB);
276
277 SmallVector<Value *> FrozenInvariants;
278 for (Value *Inv : Invariants) {
279 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
280 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
281 FrozenInvariants.push_back(Inv);
282 }
283
284 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
285 : IRB.CreateAnd(FrozenInvariants);
286 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
287 Direction ? &NormalSucc : &UnswitchedSucc);
288}
289
290/// Copy a set of loop invariant values, and conditionally branch on them.
292 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
293 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
294 MemorySSAUpdater *MSSAU) {
296 for (auto *Val : reverse(ToDuplicate)) {
297 Instruction *Inst = cast<Instruction>(Val);
298 Instruction *NewInst = Inst->clone();
299 NewInst->insertInto(&BB, BB.end());
300 RemapInstruction(NewInst, VMap,
302 VMap[Val] = NewInst;
303
304 if (!MSSAU)
305 continue;
306
307 MemorySSA *MSSA = MSSAU->getMemorySSA();
308 if (auto *MemUse =
309 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
310 auto *DefiningAccess = MemUse->getDefiningAccess();
311 // Get the first defining access before the loop.
312 while (L.contains(DefiningAccess->getBlock())) {
313 // If the defining access is a MemoryPhi, get the incoming
314 // value for the pre-header as defining access.
315 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
316 DefiningAccess =
317 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
318 else
319 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
320 }
321 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
322 NewInst->getParent(),
324 }
325 }
326
327 IRBuilder<> IRB(&BB);
328 Value *Cond = VMap[ToDuplicate[0]];
329 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
330 Direction ? &NormalSucc : &UnswitchedSucc);
331}
332
333/// Rewrite the PHI nodes in an unswitched loop exit basic block.
334///
335/// Requires that the loop exit and unswitched basic block are the same, and
336/// that the exiting block was a unique predecessor of that block. Rewrites the
337/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
338/// PHI nodes from the old preheader that now contains the unswitched
339/// terminator.
341 BasicBlock &OldExitingBB,
342 BasicBlock &OldPH) {
343 for (PHINode &PN : UnswitchedBB.phis()) {
344 // When the loop exit is directly unswitched we just need to update the
345 // incoming basic block. We loop to handle weird cases with repeated
346 // incoming blocks, but expect to typically only have one operand here.
347 for (auto i : seq<int>(0, PN.getNumOperands())) {
348 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
349 "Found incoming block different from unique predecessor!");
350 PN.setIncomingBlock(i, &OldPH);
351 }
352 }
353}
354
355/// Rewrite the PHI nodes in the loop exit basic block and the split off
356/// unswitched block.
357///
358/// Because the exit block remains an exit from the loop, this rewrites the
359/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
360/// nodes into the unswitched basic block to select between the value in the
361/// old preheader and the loop exit.
363 BasicBlock &UnswitchedBB,
364 BasicBlock &OldExitingBB,
365 BasicBlock &OldPH,
366 bool FullUnswitch) {
367 assert(&ExitBB != &UnswitchedBB &&
368 "Must have different loop exit and unswitched blocks!");
369 Instruction *InsertPt = &*UnswitchedBB.begin();
370 for (PHINode &PN : ExitBB.phis()) {
371 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
372 PN.getName() + ".split", InsertPt);
373
374 // Walk backwards over the old PHI node's inputs to minimize the cost of
375 // removing each one. We have to do this weird loop manually so that we
376 // create the same number of new incoming edges in the new PHI as we expect
377 // each case-based edge to be included in the unswitched switch in some
378 // cases.
379 // FIXME: This is really, really gross. It would be much cleaner if LLVM
380 // allowed us to create a single entry for a predecessor block without
381 // having separate entries for each "edge" even though these edges are
382 // required to produce identical results.
383 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
384 if (PN.getIncomingBlock(i) != &OldExitingBB)
385 continue;
386
387 Value *Incoming = PN.getIncomingValue(i);
388 if (FullUnswitch)
389 // No more edge from the old exiting block to the exit block.
390 PN.removeIncomingValue(i);
391
392 NewPN->addIncoming(Incoming, &OldPH);
393 }
394
395 // Now replace the old PHI with the new one and wire the old one in as an
396 // input to the new one.
397 PN.replaceAllUsesWith(NewPN);
398 NewPN->addIncoming(&PN, &ExitBB);
399 }
400}
401
402/// Hoist the current loop up to the innermost loop containing a remaining exit.
403///
404/// Because we've removed an exit from the loop, we may have changed the set of
405/// loops reachable and need to move the current loop up the loop nest or even
406/// to an entirely separate nest.
407static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
408 DominatorTree &DT, LoopInfo &LI,
409 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
410 // If the loop is already at the top level, we can't hoist it anywhere.
411 Loop *OldParentL = L.getParentLoop();
412 if (!OldParentL)
413 return;
414
416 L.getExitBlocks(Exits);
417 Loop *NewParentL = nullptr;
418 for (auto *ExitBB : Exits)
419 if (Loop *ExitL = LI.getLoopFor(ExitBB))
420 if (!NewParentL || NewParentL->contains(ExitL))
421 NewParentL = ExitL;
422
423 if (NewParentL == OldParentL)
424 return;
425
426 // The new parent loop (if different) should always contain the old one.
427 if (NewParentL)
428 assert(NewParentL->contains(OldParentL) &&
429 "Can only hoist this loop up the nest!");
430
431 // The preheader will need to move with the body of this loop. However,
432 // because it isn't in this loop we also need to update the primary loop map.
433 assert(OldParentL == LI.getLoopFor(&Preheader) &&
434 "Parent loop of this loop should contain this loop's preheader!");
435 LI.changeLoopFor(&Preheader, NewParentL);
436
437 // Remove this loop from its old parent.
438 OldParentL->removeChildLoop(&L);
439
440 // Add the loop either to the new parent or as a top-level loop.
441 if (NewParentL)
442 NewParentL->addChildLoop(&L);
443 else
444 LI.addTopLevelLoop(&L);
445
446 // Remove this loops blocks from the old parent and every other loop up the
447 // nest until reaching the new parent. Also update all of these
448 // no-longer-containing loops to reflect the nesting change.
449 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
450 OldContainingL = OldContainingL->getParentLoop()) {
451 llvm::erase_if(OldContainingL->getBlocksVector(),
452 [&](const BasicBlock *BB) {
453 return BB == &Preheader || L.contains(BB);
454 });
455
456 OldContainingL->getBlocksSet().erase(&Preheader);
457 for (BasicBlock *BB : L.blocks())
458 OldContainingL->getBlocksSet().erase(BB);
459
460 // Because we just hoisted a loop out of this one, we have essentially
461 // created new exit paths from it. That means we need to form LCSSA PHI
462 // nodes for values used in the no-longer-nested loop.
463 formLCSSA(*OldContainingL, DT, &LI, SE);
464
465 // We shouldn't need to form dedicated exits because the exit introduced
466 // here is the (just split by unswitching) preheader. However, after trivial
467 // unswitching it is possible to get new non-dedicated exits out of parent
468 // loop so let's conservatively form dedicated exit blocks and figure out
469 // if we can optimize later.
470 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
471 /*PreserveLCSSA*/ true);
472 }
473}
474
475// Return the top-most loop containing ExitBB and having ExitBB as exiting block
476// or the loop containing ExitBB, if there is no parent loop containing ExitBB
477// as exiting block.
478static const Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
479 const LoopInfo &LI) {
480 const Loop *TopMost = LI.getLoopFor(ExitBB);
481 const Loop *Current = TopMost;
482 while (Current) {
483 if (Current->isLoopExiting(ExitBB))
484 TopMost = Current;
485 Current = Current->getParentLoop();
486 }
487 return TopMost;
488}
489
490/// Unswitch a trivial branch if the condition is loop invariant.
491///
492/// This routine should only be called when loop code leading to the branch has
493/// been validated as trivial (no side effects). This routine checks if the
494/// condition is invariant and one of the successors is a loop exit. This
495/// allows us to unswitch without duplicating the loop, making it trivial.
496///
497/// If this routine fails to unswitch the branch it returns false.
498///
499/// If the branch can be unswitched, this routine splits the preheader and
500/// hoists the branch above that split. Preserves loop simplified form
501/// (splitting the exit block as necessary). It simplifies the branch within
502/// the loop to an unconditional branch but doesn't remove it entirely. Further
503/// cleanup can be done with some simplifycfg like pass.
504///
505/// If `SE` is not null, it will be updated based on the potential loop SCEVs
506/// invalidated by this.
508 LoopInfo &LI, ScalarEvolution *SE,
509 MemorySSAUpdater *MSSAU) {
510 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
511 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
512
513 // The loop invariant values that we want to unswitch.
514 TinyPtrVector<Value *> Invariants;
515
516 // When true, we're fully unswitching the branch rather than just unswitching
517 // some input conditions to the branch.
518 bool FullUnswitch = false;
519
521 if (L.isLoopInvariant(Cond)) {
522 Invariants.push_back(Cond);
523 FullUnswitch = true;
524 } else {
525 if (auto *CondInst = dyn_cast<Instruction>(Cond))
526 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
527 if (Invariants.empty()) {
528 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
529 return false;
530 }
531 }
532
533 // Check that one of the branch's successors exits, and which one.
534 bool ExitDirection = true;
535 int LoopExitSuccIdx = 0;
536 auto *LoopExitBB = BI.getSuccessor(0);
537 if (L.contains(LoopExitBB)) {
538 ExitDirection = false;
539 LoopExitSuccIdx = 1;
540 LoopExitBB = BI.getSuccessor(1);
541 if (L.contains(LoopExitBB)) {
542 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
543 return false;
544 }
545 }
546 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
547 auto *ParentBB = BI.getParent();
548 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
549 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
550 return false;
551 }
552
553 // When unswitching only part of the branch's condition, we need the exit
554 // block to be reached directly from the partially unswitched input. This can
555 // be done when the exit block is along the true edge and the branch condition
556 // is a graph of `or` operations, or the exit block is along the false edge
557 // and the condition is a graph of `and` operations.
558 if (!FullUnswitch) {
559 if (ExitDirection ? !match(Cond, m_LogicalOr())
560 : !match(Cond, m_LogicalAnd())) {
561 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
562 "non-full unswitch!\n");
563 return false;
564 }
565 }
566
567 LLVM_DEBUG({
568 dbgs() << " unswitching trivial invariant conditions for: " << BI
569 << "\n";
570 for (Value *Invariant : Invariants) {
571 dbgs() << " " << *Invariant << " == true";
572 if (Invariant != Invariants.back())
573 dbgs() << " ||";
574 dbgs() << "\n";
575 }
576 });
577
578 // If we have scalar evolutions, we need to invalidate them including this
579 // loop, the loop containing the exit block and the topmost parent loop
580 // exiting via LoopExitBB.
581 if (SE) {
582 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
583 SE->forgetLoop(ExitL);
584 else
585 // Forget the entire nest as this exits the entire nest.
586 SE->forgetTopmostLoop(&L);
588 }
589
590 if (MSSAU && VerifyMemorySSA)
591 MSSAU->getMemorySSA()->verifyMemorySSA();
592
593 // Split the preheader, so that we know that there is a safe place to insert
594 // the conditional branch. We will change the preheader to have a conditional
595 // branch on LoopCond.
596 BasicBlock *OldPH = L.getLoopPreheader();
597 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
598
599 // Now that we have a place to insert the conditional branch, create a place
600 // to branch to: this is the exit block out of the loop that we are
601 // unswitching. We need to split this if there are other loop predecessors.
602 // Because the loop is in simplified form, *any* other predecessor is enough.
603 BasicBlock *UnswitchedBB;
604 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
605 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
606 "A branch's parent isn't a predecessor!");
607 UnswitchedBB = LoopExitBB;
608 } else {
609 UnswitchedBB =
610 SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
611 }
612
613 if (MSSAU && VerifyMemorySSA)
614 MSSAU->getMemorySSA()->verifyMemorySSA();
615
616 // Actually move the invariant uses into the unswitched position. If possible,
617 // we do this by moving the instructions, but when doing partial unswitching
618 // we do it by building a new merge of the values in the unswitched position.
619 OldPH->getTerminator()->eraseFromParent();
620 if (FullUnswitch) {
621 // If fully unswitching, we can use the existing branch instruction.
622 // Splice it into the old PH to gate reaching the new preheader and re-point
623 // its successors.
624 OldPH->splice(OldPH->end(), BI.getParent(), BI.getIterator());
625 BI.setCondition(Cond);
626 if (MSSAU) {
627 // Temporarily clone the terminator, to make MSSA update cheaper by
628 // separating "insert edge" updates from "remove edge" ones.
629 BI.clone()->insertInto(ParentBB, ParentBB->end());
630 } else {
631 // Create a new unconditional branch that will continue the loop as a new
632 // terminator.
633 BranchInst::Create(ContinueBB, ParentBB);
634 }
635 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
636 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
637 } else {
638 // Only unswitching a subset of inputs to the condition, so we will need to
639 // build a new branch that merges the invariant inputs.
640 if (ExitDirection)
642 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
643 "condition!");
644 else
646 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
647 " condition!");
649 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
650 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
651 }
652
653 // Update the dominator tree with the added edge.
654 DT.insertEdge(OldPH, UnswitchedBB);
655
656 // After the dominator tree was updated with the added edge, update MemorySSA
657 // if available.
658 if (MSSAU) {
660 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
661 MSSAU->applyInsertUpdates(Updates, DT);
662 }
663
664 // Finish updating dominator tree and memory ssa for full unswitch.
665 if (FullUnswitch) {
666 if (MSSAU) {
667 // Remove the cloned branch instruction.
668 ParentBB->getTerminator()->eraseFromParent();
669 // Create unconditional branch now.
670 BranchInst::Create(ContinueBB, ParentBB);
671 MSSAU->removeEdge(ParentBB, LoopExitBB);
672 }
673 DT.deleteEdge(ParentBB, LoopExitBB);
674 }
675
676 if (MSSAU && VerifyMemorySSA)
677 MSSAU->getMemorySSA()->verifyMemorySSA();
678
679 // Rewrite the relevant PHI nodes.
680 if (UnswitchedBB == LoopExitBB)
681 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
682 else
683 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
684 *ParentBB, *OldPH, FullUnswitch);
685
686 // The constant we can replace all of our invariants with inside the loop
687 // body. If any of the invariants have a value other than this the loop won't
688 // be entered.
689 ConstantInt *Replacement = ExitDirection
692
693 // Since this is an i1 condition we can also trivially replace uses of it
694 // within the loop with a constant.
695 for (Value *Invariant : Invariants)
696 replaceLoopInvariantUses(L, Invariant, *Replacement);
697
698 // If this was full unswitching, we may have changed the nesting relationship
699 // for this loop so hoist it to its correct parent if needed.
700 if (FullUnswitch)
701 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
702
703 if (MSSAU && VerifyMemorySSA)
704 MSSAU->getMemorySSA()->verifyMemorySSA();
705
706 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
707 ++NumTrivial;
708 ++NumBranches;
709 return true;
710}
711
712/// Unswitch a trivial switch if the condition is loop invariant.
713///
714/// This routine should only be called when loop code leading to the switch has
715/// been validated as trivial (no side effects). This routine checks if the
716/// condition is invariant and that at least one of the successors is a loop
717/// exit. This allows us to unswitch without duplicating the loop, making it
718/// trivial.
719///
720/// If this routine fails to unswitch the switch it returns false.
721///
722/// If the switch can be unswitched, this routine splits the preheader and
723/// copies the switch above that split. If the default case is one of the
724/// exiting cases, it copies the non-exiting cases and points them at the new
725/// preheader. If the default case is not exiting, it copies the exiting cases
726/// and points the default at the preheader. It preserves loop simplified form
727/// (splitting the exit blocks as necessary). It simplifies the switch within
728/// the loop by removing now-dead cases. If the default case is one of those
729/// unswitched, it replaces its destination with a new basic block containing
730/// only unreachable. Such basic blocks, while technically loop exits, are not
731/// considered for unswitching so this is a stable transform and the same
732/// switch will not be revisited. If after unswitching there is only a single
733/// in-loop successor, the switch is further simplified to an unconditional
734/// branch. Still more cleanup can be done with some simplifycfg like pass.
735///
736/// If `SE` is not null, it will be updated based on the potential loop SCEVs
737/// invalidated by this.
739 LoopInfo &LI, ScalarEvolution *SE,
740 MemorySSAUpdater *MSSAU) {
741 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
742 Value *LoopCond = SI.getCondition();
743
744 // If this isn't switching on an invariant condition, we can't unswitch it.
745 if (!L.isLoopInvariant(LoopCond))
746 return false;
747
748 auto *ParentBB = SI.getParent();
749
750 // The same check must be used both for the default and the exit cases. We
751 // should never leave edges from the switch instruction to a basic block that
752 // we are unswitching, hence the condition used to determine the default case
753 // needs to also be used to populate ExitCaseIndices, which is then used to
754 // remove cases from the switch.
755 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
756 // BBToCheck is not an exit block if it is inside loop L.
757 if (L.contains(&BBToCheck))
758 return false;
759 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
760 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
761 return false;
762 // We do not unswitch a block that only has an unreachable statement, as
763 // it's possible this is a previously unswitched block. Only unswitch if
764 // either the terminator is not unreachable, or, if it is, it's not the only
765 // instruction in the block.
766 auto *TI = BBToCheck.getTerminator();
767 bool isUnreachable = isa<UnreachableInst>(TI);
768 return !isUnreachable ||
769 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
770 };
771
772 SmallVector<int, 4> ExitCaseIndices;
773 for (auto Case : SI.cases())
774 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
775 ExitCaseIndices.push_back(Case.getCaseIndex());
776 BasicBlock *DefaultExitBB = nullptr;
779 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
780 DefaultExitBB = SI.getDefaultDest();
781 } else if (ExitCaseIndices.empty())
782 return false;
783
784 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
785
786 if (MSSAU && VerifyMemorySSA)
787 MSSAU->getMemorySSA()->verifyMemorySSA();
788
789 // We may need to invalidate SCEVs for the outermost loop reached by any of
790 // the exits.
791 Loop *OuterL = &L;
792
793 if (DefaultExitBB) {
794 // Check the loop containing this exit.
795 Loop *ExitL = LI.getLoopFor(DefaultExitBB);
796 if (!ExitL || ExitL->contains(OuterL))
797 OuterL = ExitL;
798 }
799
800 if (SE) {
801 if (OuterL)
802 SE->forgetLoop(OuterL);
803 else
804 SE->forgetTopmostLoop(&L);
805 }
806
807 if (DefaultExitBB) {
808 // Clear out the default destination temporarily to allow accurate
809 // predecessor lists to be examined below.
810 SI.setDefaultDest(nullptr);
811 }
812
813 // Store the exit cases into a separate data structure and remove them from
814 // the switch.
815 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
817 4> ExitCases;
818 ExitCases.reserve(ExitCaseIndices.size());
820 // We walk the case indices backwards so that we remove the last case first
821 // and don't disrupt the earlier indices.
822 for (unsigned Index : reverse(ExitCaseIndices)) {
823 auto CaseI = SI.case_begin() + Index;
824 // Compute the outer loop from this exit.
825 Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
826 if (!ExitL || ExitL->contains(OuterL))
827 OuterL = ExitL;
828 // Save the value of this case.
829 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
830 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
831 // Delete the unswitched cases.
832 SIW.removeCase(CaseI);
833 }
834
835 // Check if after this all of the remaining cases point at the same
836 // successor.
837 BasicBlock *CommonSuccBB = nullptr;
838 if (SI.getNumCases() > 0 &&
839 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
840 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
841 }))
842 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
843 if (!DefaultExitBB) {
844 // If we're not unswitching the default, we need it to match any cases to
845 // have a common successor or if we have no cases it is the common
846 // successor.
847 if (SI.getNumCases() == 0)
848 CommonSuccBB = SI.getDefaultDest();
849 else if (SI.getDefaultDest() != CommonSuccBB)
850 CommonSuccBB = nullptr;
851 }
852
853 // Split the preheader, so that we know that there is a safe place to insert
854 // the switch.
855 BasicBlock *OldPH = L.getLoopPreheader();
856 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
857 OldPH->getTerminator()->eraseFromParent();
858
859 // Now add the unswitched switch.
860 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
861 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
862
863 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
864 // First, we split any exit blocks with remaining in-loop predecessors. Then
865 // we update the PHIs in one of two ways depending on if there was a split.
866 // We walk in reverse so that we split in the same order as the cases
867 // appeared. This is purely for convenience of reading the resulting IR, but
868 // it doesn't cost anything really.
869 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
871 // Handle the default exit if necessary.
872 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
873 // ranges aren't quite powerful enough yet.
874 if (DefaultExitBB) {
875 if (pred_empty(DefaultExitBB)) {
876 UnswitchedExitBBs.insert(DefaultExitBB);
877 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
878 } else {
879 auto *SplitBB =
880 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
881 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
882 *ParentBB, *OldPH,
883 /*FullUnswitch*/ true);
884 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
885 }
886 }
887 // Note that we must use a reference in the for loop so that we update the
888 // container.
889 for (auto &ExitCase : reverse(ExitCases)) {
890 // Grab a reference to the exit block in the pair so that we can update it.
891 BasicBlock *ExitBB = std::get<1>(ExitCase);
892
893 // If this case is the last edge into the exit block, we can simply reuse it
894 // as it will no longer be a loop exit. No mapping necessary.
895 if (pred_empty(ExitBB)) {
896 // Only rewrite once.
897 if (UnswitchedExitBBs.insert(ExitBB).second)
898 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
899 continue;
900 }
901
902 // Otherwise we need to split the exit block so that we retain an exit
903 // block from the loop and a target for the unswitched condition.
904 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
905 if (!SplitExitBB) {
906 // If this is the first time we see this, do the split and remember it.
907 SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
908 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
909 *ParentBB, *OldPH,
910 /*FullUnswitch*/ true);
911 }
912 // Update the case pair to point to the split block.
913 std::get<1>(ExitCase) = SplitExitBB;
914 }
915
916 // Now add the unswitched cases. We do this in reverse order as we built them
917 // in reverse order.
918 for (auto &ExitCase : reverse(ExitCases)) {
919 ConstantInt *CaseVal = std::get<0>(ExitCase);
920 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
921
922 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
923 }
924
925 // If the default was unswitched, re-point it and add explicit cases for
926 // entering the loop.
927 if (DefaultExitBB) {
928 NewSIW->setDefaultDest(DefaultExitBB);
929 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
930
931 // We removed all the exit cases, so we just copy the cases to the
932 // unswitched switch.
933 for (const auto &Case : SI.cases())
934 NewSIW.addCase(Case.getCaseValue(), NewPH,
936 } else if (DefaultCaseWeight) {
937 // We have to set branch weight of the default case.
938 uint64_t SW = *DefaultCaseWeight;
939 for (const auto &Case : SI.cases()) {
940 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
941 assert(W &&
942 "case weight must be defined as default case weight is defined");
943 SW += *W;
944 }
945 NewSIW.setSuccessorWeight(0, SW);
946 }
947
948 // If we ended up with a common successor for every path through the switch
949 // after unswitching, rewrite it to an unconditional branch to make it easy
950 // to recognize. Otherwise we potentially have to recognize the default case
951 // pointing at unreachable and other complexity.
952 if (CommonSuccBB) {
953 BasicBlock *BB = SI.getParent();
954 // We may have had multiple edges to this common successor block, so remove
955 // them as predecessors. We skip the first one, either the default or the
956 // actual first case.
957 bool SkippedFirst = DefaultExitBB == nullptr;
958 for (auto Case : SI.cases()) {
959 assert(Case.getCaseSuccessor() == CommonSuccBB &&
960 "Non-common successor!");
961 (void)Case;
962 if (!SkippedFirst) {
963 SkippedFirst = true;
964 continue;
965 }
966 CommonSuccBB->removePredecessor(BB,
967 /*KeepOneInputPHIs*/ true);
968 }
969 // Now nuke the switch and replace it with a direct branch.
970 SIW.eraseFromParent();
971 BranchInst::Create(CommonSuccBB, BB);
972 } else if (DefaultExitBB) {
973 assert(SI.getNumCases() > 0 &&
974 "If we had no cases we'd have a common successor!");
975 // Move the last case to the default successor. This is valid as if the
976 // default got unswitched it cannot be reached. This has the advantage of
977 // being simple and keeping the number of edges from this switch to
978 // successors the same, and avoiding any PHI update complexity.
979 auto LastCaseI = std::prev(SI.case_end());
980
981 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
983 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
984 SIW.removeCase(LastCaseI);
985 }
986
987 // Walk the unswitched exit blocks and the unswitched split blocks and update
988 // the dominator tree based on the CFG edits. While we are walking unordered
989 // containers here, the API for applyUpdates takes an unordered list of
990 // updates and requires them to not contain duplicates.
992 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
993 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
994 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
995 }
996 for (auto SplitUnswitchedPair : SplitExitBBMap) {
997 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
998 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
999 }
1000
1001 if (MSSAU) {
1002 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1003 if (VerifyMemorySSA)
1004 MSSAU->getMemorySSA()->verifyMemorySSA();
1005 } else {
1006 DT.applyUpdates(DTUpdates);
1007 }
1008
1009 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1010
1011 // We may have changed the nesting relationship for this loop so hoist it to
1012 // its correct parent if needed.
1013 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1014
1015 if (MSSAU && VerifyMemorySSA)
1016 MSSAU->getMemorySSA()->verifyMemorySSA();
1017
1018 ++NumTrivial;
1019 ++NumSwitches;
1020 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1021 return true;
1022}
1023
1024/// This routine scans the loop to find a branch or switch which occurs before
1025/// any side effects occur. These can potentially be unswitched without
1026/// duplicating the loop. If a branch or switch is successfully unswitched the
1027/// scanning continues to see if subsequent branches or switches have become
1028/// trivial. Once all trivial candidates have been unswitched, this routine
1029/// returns.
1030///
1031/// The return value indicates whether anything was unswitched (and therefore
1032/// changed).
1033///
1034/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1035/// invalidated by this.
1037 LoopInfo &LI, ScalarEvolution *SE,
1038 MemorySSAUpdater *MSSAU) {
1039 bool Changed = false;
1040
1041 // If loop header has only one reachable successor we should keep looking for
1042 // trivial condition candidates in the successor as well. An alternative is
1043 // to constant fold conditions and merge successors into loop header (then we
1044 // only need to check header's terminator). The reason for not doing this in
1045 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1046 // invariants. Folding dead branches could either eliminate the current loop
1047 // or make other loops unreachable. LCSSA form might also not be preserved
1048 // after deleting branches. The following code keeps traversing loop header's
1049 // successors until it finds the trivial condition candidate (condition that
1050 // is not a constant). Since unswitching generates branches with constant
1051 // conditions, this scenario could be very common in practice.
1052 BasicBlock *CurrentBB = L.getHeader();
1054 Visited.insert(CurrentBB);
1055 do {
1056 // Check if there are any side-effecting instructions (e.g. stores, calls,
1057 // volatile loads) in the part of the loop that the code *would* execute
1058 // without unswitching.
1059 if (MSSAU) // Possible early exit with MSSA
1060 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1061 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1062 return Changed;
1063 if (llvm::any_of(*CurrentBB,
1064 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1065 return Changed;
1066
1067 Instruction *CurrentTerm = CurrentBB->getTerminator();
1068
1069 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1070 // Don't bother trying to unswitch past a switch with a constant
1071 // condition. This should be removed prior to running this pass by
1072 // simplifycfg.
1073 if (isa<Constant>(SI->getCondition()))
1074 return Changed;
1075
1076 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1077 // Couldn't unswitch this one so we're done.
1078 return Changed;
1079
1080 // Mark that we managed to unswitch something.
1081 Changed = true;
1082
1083 // If unswitching turned the terminator into an unconditional branch then
1084 // we can continue. The unswitching logic specifically works to fold any
1085 // cases it can into an unconditional branch to make it easier to
1086 // recognize here.
1087 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1088 if (!BI || BI->isConditional())
1089 return Changed;
1090
1091 CurrentBB = BI->getSuccessor(0);
1092 continue;
1093 }
1094
1095 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1096 if (!BI)
1097 // We do not understand other terminator instructions.
1098 return Changed;
1099
1100 // Don't bother trying to unswitch past an unconditional branch or a branch
1101 // with a constant value. These should be removed by simplifycfg prior to
1102 // running this pass.
1103 if (!BI->isConditional() ||
1104 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1105 return Changed;
1106
1107 // Found a trivial condition candidate: non-foldable conditional branch. If
1108 // we fail to unswitch this, we can't do anything else that is trivial.
1109 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1110 return Changed;
1111
1112 // Mark that we managed to unswitch something.
1113 Changed = true;
1114
1115 // If we only unswitched some of the conditions feeding the branch, we won't
1116 // have collapsed it to a single successor.
1117 BI = cast<BranchInst>(CurrentBB->getTerminator());
1118 if (BI->isConditional())
1119 return Changed;
1120
1121 // Follow the newly unconditional branch into its successor.
1122 CurrentBB = BI->getSuccessor(0);
1123
1124 // When continuing, if we exit the loop or reach a previous visited block,
1125 // then we can not reach any trivial condition candidates (unfoldable
1126 // branch instructions or switch instructions) and no unswitch can happen.
1127 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1128
1129 return Changed;
1130}
1131
1132/// Build the cloned blocks for an unswitched copy of the given loop.
1133///
1134/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1135/// after the split block (`SplitBB`) that will be used to select between the
1136/// cloned and original loop.
1137///
1138/// This routine handles cloning all of the necessary loop blocks and exit
1139/// blocks including rewriting their instructions and the relevant PHI nodes.
1140/// Any loop blocks or exit blocks which are dominated by a different successor
1141/// than the one for this clone of the loop blocks can be trivially skipped. We
1142/// use the `DominatingSucc` map to determine whether a block satisfies that
1143/// property with a simple map lookup.
1144///
1145/// It also correctly creates the unconditional branch in the cloned
1146/// unswitched parent block to only point at the unswitched successor.
1147///
1148/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1149/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1150/// the cloned blocks (and their loops) are left without full `LoopInfo`
1151/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1152/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1153/// instead the caller must recompute an accurate DT. It *does* correctly
1154/// update the `AssumptionCache` provided in `AC`.
1156 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1157 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1158 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1160 ValueToValueMapTy &VMap,
1162 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1163 ScalarEvolution *SE) {
1165 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1166
1167 // We will need to clone a bunch of blocks, wrap up the clone operation in
1168 // a helper.
1169 auto CloneBlock = [&](BasicBlock *OldBB) {
1170 // Clone the basic block and insert it before the new preheader.
1171 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1172 NewBB->moveBefore(LoopPH);
1173
1174 // Record this block and the mapping.
1175 NewBlocks.push_back(NewBB);
1176 VMap[OldBB] = NewBB;
1177
1178 return NewBB;
1179 };
1180
1181 // We skip cloning blocks when they have a dominating succ that is not the
1182 // succ we are cloning for.
1183 auto SkipBlock = [&](BasicBlock *BB) {
1184 auto It = DominatingSucc.find(BB);
1185 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1186 };
1187
1188 // First, clone the preheader.
1189 auto *ClonedPH = CloneBlock(LoopPH);
1190
1191 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1192 for (auto *LoopBB : L.blocks())
1193 if (!SkipBlock(LoopBB))
1194 CloneBlock(LoopBB);
1195
1196 // Split all the loop exit edges so that when we clone the exit blocks, if
1197 // any of the exit blocks are *also* a preheader for some other loop, we
1198 // don't create multiple predecessors entering the loop header.
1199 for (auto *ExitBB : ExitBlocks) {
1200 if (SkipBlock(ExitBB))
1201 continue;
1202
1203 // When we are going to clone an exit, we don't need to clone all the
1204 // instructions in the exit block and we want to ensure we have an easy
1205 // place to merge the CFG, so split the exit first. This is always safe to
1206 // do because there cannot be any non-loop predecessors of a loop exit in
1207 // loop simplified form.
1208 auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1209
1210 // Rearrange the names to make it easier to write test cases by having the
1211 // exit block carry the suffix rather than the merge block carrying the
1212 // suffix.
1213 MergeBB->takeName(ExitBB);
1214 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1215
1216 // Now clone the original exit block.
1217 auto *ClonedExitBB = CloneBlock(ExitBB);
1218 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1219 "Exit block should have been split to have one successor!");
1220 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1221 "Cloned exit block has the wrong successor!");
1222
1223 // Remap any cloned instructions and create a merge phi node for them.
1224 for (auto ZippedInsts : llvm::zip_first(
1225 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1226 llvm::make_range(ClonedExitBB->begin(),
1227 std::prev(ClonedExitBB->end())))) {
1228 Instruction &I = std::get<0>(ZippedInsts);
1229 Instruction &ClonedI = std::get<1>(ZippedInsts);
1230
1231 // The only instructions in the exit block should be PHI nodes and
1232 // potentially a landing pad.
1233 assert(
1234 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1235 "Bad instruction in exit block!");
1236 // We should have a value map between the instruction and its clone.
1237 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1238
1239 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1240 if (SE && isa<PHINode>(I))
1241 SE->forgetValue(&I);
1242
1243 auto *MergePN =
1244 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1245 &*MergeBB->getFirstInsertionPt());
1246 I.replaceAllUsesWith(MergePN);
1247 MergePN->addIncoming(&I, ExitBB);
1248 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1249 }
1250 }
1251
1252 // Rewrite the instructions in the cloned blocks to refer to the instructions
1253 // in the cloned blocks. We have to do this as a second pass so that we have
1254 // everything available. Also, we have inserted new instructions which may
1255 // include assume intrinsics, so we update the assumption cache while
1256 // processing this.
1257 for (auto *ClonedBB : NewBlocks)
1258 for (Instruction &I : *ClonedBB) {
1259 RemapInstruction(&I, VMap,
1261 if (auto *II = dyn_cast<AssumeInst>(&I))
1262 AC.registerAssumption(II);
1263 }
1264
1265 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1266 // have spurious incoming values.
1267 for (auto *LoopBB : L.blocks())
1268 if (SkipBlock(LoopBB))
1269 for (auto *SuccBB : successors(LoopBB))
1270 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1271 for (PHINode &PN : ClonedSuccBB->phis())
1272 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1273
1274 // Remove the cloned parent as a predecessor of any successor we ended up
1275 // cloning other than the unswitched one.
1276 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1277 for (auto *SuccBB : successors(ParentBB)) {
1278 if (SuccBB == UnswitchedSuccBB)
1279 continue;
1280
1281 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1282 if (!ClonedSuccBB)
1283 continue;
1284
1285 ClonedSuccBB->removePredecessor(ClonedParentBB,
1286 /*KeepOneInputPHIs*/ true);
1287 }
1288
1289 // Replace the cloned branch with an unconditional branch to the cloned
1290 // unswitched successor.
1291 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1292 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1293 // Trivial Simplification. If Terminator is a conditional branch and
1294 // condition becomes dead - erase it.
1295 Value *ClonedConditionToErase = nullptr;
1296 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1297 ClonedConditionToErase = BI->getCondition();
1298 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1299 ClonedConditionToErase = SI->getCondition();
1300
1301 ClonedTerminator->eraseFromParent();
1302 BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1303
1304 if (ClonedConditionToErase)
1305 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1306 MSSAU);
1307
1308 // If there are duplicate entries in the PHI nodes because of multiple edges
1309 // to the unswitched successor, we need to nuke all but one as we replaced it
1310 // with a direct branch.
1311 for (PHINode &PN : ClonedSuccBB->phis()) {
1312 bool Found = false;
1313 // Loop over the incoming operands backwards so we can easily delete as we
1314 // go without invalidating the index.
1315 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1316 if (PN.getIncomingBlock(i) != ClonedParentBB)
1317 continue;
1318 if (!Found) {
1319 Found = true;
1320 continue;
1321 }
1322 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1323 }
1324 }
1325
1326 // Record the domtree updates for the new blocks.
1328 for (auto *ClonedBB : NewBlocks) {
1329 for (auto *SuccBB : successors(ClonedBB))
1330 if (SuccSet.insert(SuccBB).second)
1331 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1332 SuccSet.clear();
1333 }
1334
1335 return ClonedPH;
1336}
1337
1338/// Recursively clone the specified loop and all of its children.
1339///
1340/// The target parent loop for the clone should be provided, or can be null if
1341/// the clone is a top-level loop. While cloning, all the blocks are mapped
1342/// with the provided value map. The entire original loop must be present in
1343/// the value map. The cloned loop is returned.
1344static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1345 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1346 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1347 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1348 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1349 for (auto *BB : OrigL.blocks()) {
1350 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1351 ClonedL.addBlockEntry(ClonedBB);
1352 if (LI.getLoopFor(BB) == &OrigL)
1353 LI.changeLoopFor(ClonedBB, &ClonedL);
1354 }
1355 };
1356
1357 // We specially handle the first loop because it may get cloned into
1358 // a different parent and because we most commonly are cloning leaf loops.
1359 Loop *ClonedRootL = LI.AllocateLoop();
1360 if (RootParentL)
1361 RootParentL->addChildLoop(ClonedRootL);
1362 else
1363 LI.addTopLevelLoop(ClonedRootL);
1364 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1365
1366 if (OrigRootL.isInnermost())
1367 return ClonedRootL;
1368
1369 // If we have a nest, we can quickly clone the entire loop nest using an
1370 // iterative approach because it is a tree. We keep the cloned parent in the
1371 // data structure to avoid repeatedly querying through a map to find it.
1372 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1373 // Build up the loops to clone in reverse order as we'll clone them from the
1374 // back.
1375 for (Loop *ChildL : llvm::reverse(OrigRootL))
1376 LoopsToClone.push_back({ClonedRootL, ChildL});
1377 do {
1378 Loop *ClonedParentL, *L;
1379 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1380 Loop *ClonedL = LI.AllocateLoop();
1381 ClonedParentL->addChildLoop(ClonedL);
1382 AddClonedBlocksToLoop(*L, *ClonedL);
1383 for (Loop *ChildL : llvm::reverse(*L))
1384 LoopsToClone.push_back({ClonedL, ChildL});
1385 } while (!LoopsToClone.empty());
1386
1387 return ClonedRootL;
1388}
1389
1390/// Build the cloned loops of an original loop from unswitching.
1391///
1392/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1393/// operation. We need to re-verify that there even is a loop (as the backedge
1394/// may not have been cloned), and even if there are remaining backedges the
1395/// backedge set may be different. However, we know that each child loop is
1396/// undisturbed, we only need to find where to place each child loop within
1397/// either any parent loop or within a cloned version of the original loop.
1398///
1399/// Because child loops may end up cloned outside of any cloned version of the
1400/// original loop, multiple cloned sibling loops may be created. All of them
1401/// are returned so that the newly introduced loop nest roots can be
1402/// identified.
1403static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1404 const ValueToValueMapTy &VMap, LoopInfo &LI,
1405 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1406 Loop *ClonedL = nullptr;
1407
1408 auto *OrigPH = OrigL.getLoopPreheader();
1409 auto *OrigHeader = OrigL.getHeader();
1410
1411 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1412 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1413
1414 // We need to know the loops of the cloned exit blocks to even compute the
1415 // accurate parent loop. If we only clone exits to some parent of the
1416 // original parent, we want to clone into that outer loop. We also keep track
1417 // of the loops that our cloned exit blocks participate in.
1418 Loop *ParentL = nullptr;
1419 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1421 ClonedExitsInLoops.reserve(ExitBlocks.size());
1422 for (auto *ExitBB : ExitBlocks)
1423 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1424 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1425 ExitLoopMap[ClonedExitBB] = ExitL;
1426 ClonedExitsInLoops.push_back(ClonedExitBB);
1427 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1428 ParentL = ExitL;
1429 }
1430 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1431 ParentL->contains(OrigL.getParentLoop())) &&
1432 "The computed parent loop should always contain (or be) the parent of "
1433 "the original loop.");
1434
1435 // We build the set of blocks dominated by the cloned header from the set of
1436 // cloned blocks out of the original loop. While not all of these will
1437 // necessarily be in the cloned loop, it is enough to establish that they
1438 // aren't in unreachable cycles, etc.
1439 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1440 for (auto *BB : OrigL.blocks())
1441 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1442 ClonedLoopBlocks.insert(ClonedBB);
1443
1444 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1445 // skipped cloning some region of this loop which can in turn skip some of
1446 // the backedges so we have to rebuild the blocks in the loop based on the
1447 // backedges that remain after cloning.
1449 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1450 for (auto *Pred : predecessors(ClonedHeader)) {
1451 // The only possible non-loop header predecessor is the preheader because
1452 // we know we cloned the loop in simplified form.
1453 if (Pred == ClonedPH)
1454 continue;
1455
1456 // Because the loop was in simplified form, the only non-loop predecessor
1457 // should be the preheader.
1458 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1459 "header other than the preheader "
1460 "that is not part of the loop!");
1461
1462 // Insert this block into the loop set and on the first visit (and if it
1463 // isn't the header we're currently walking) put it into the worklist to
1464 // recurse through.
1465 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1466 Worklist.push_back(Pred);
1467 }
1468
1469 // If we had any backedges then there *is* a cloned loop. Put the header into
1470 // the loop set and then walk the worklist backwards to find all the blocks
1471 // that remain within the loop after cloning.
1472 if (!BlocksInClonedLoop.empty()) {
1473 BlocksInClonedLoop.insert(ClonedHeader);
1474
1475 while (!Worklist.empty()) {
1476 BasicBlock *BB = Worklist.pop_back_val();
1477 assert(BlocksInClonedLoop.count(BB) &&
1478 "Didn't put block into the loop set!");
1479
1480 // Insert any predecessors that are in the possible set into the cloned
1481 // set, and if the insert is successful, add them to the worklist. Note
1482 // that we filter on the blocks that are definitely reachable via the
1483 // backedge to the loop header so we may prune out dead code within the
1484 // cloned loop.
1485 for (auto *Pred : predecessors(BB))
1486 if (ClonedLoopBlocks.count(Pred) &&
1487 BlocksInClonedLoop.insert(Pred).second)
1488 Worklist.push_back(Pred);
1489 }
1490
1491 ClonedL = LI.AllocateLoop();
1492 if (ParentL) {
1493 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1494 ParentL->addChildLoop(ClonedL);
1495 } else {
1496 LI.addTopLevelLoop(ClonedL);
1497 }
1498 NonChildClonedLoops.push_back(ClonedL);
1499
1500 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1501 // We don't want to just add the cloned loop blocks based on how we
1502 // discovered them. The original order of blocks was carefully built in
1503 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1504 // that logic, we just re-walk the original blocks (and those of the child
1505 // loops) and filter them as we add them into the cloned loop.
1506 for (auto *BB : OrigL.blocks()) {
1507 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1508 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1509 continue;
1510
1511 // Directly add the blocks that are only in this loop.
1512 if (LI.getLoopFor(BB) == &OrigL) {
1513 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1514 continue;
1515 }
1516
1517 // We want to manually add it to this loop and parents.
1518 // Registering it with LoopInfo will happen when we clone the top
1519 // loop for this block.
1520 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1521 PL->addBlockEntry(ClonedBB);
1522 }
1523
1524 // Now add each child loop whose header remains within the cloned loop. All
1525 // of the blocks within the loop must satisfy the same constraints as the
1526 // header so once we pass the header checks we can just clone the entire
1527 // child loop nest.
1528 for (Loop *ChildL : OrigL) {
1529 auto *ClonedChildHeader =
1530 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1531 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1532 continue;
1533
1534#ifndef NDEBUG
1535 // We should never have a cloned child loop header but fail to have
1536 // all of the blocks for that child loop.
1537 for (auto *ChildLoopBB : ChildL->blocks())
1538 assert(BlocksInClonedLoop.count(
1539 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1540 "Child cloned loop has a header within the cloned outer "
1541 "loop but not all of its blocks!");
1542#endif
1543
1544 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1545 }
1546 }
1547
1548 // Now that we've handled all the components of the original loop that were
1549 // cloned into a new loop, we still need to handle anything from the original
1550 // loop that wasn't in a cloned loop.
1551
1552 // Figure out what blocks are left to place within any loop nest containing
1553 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1554 // them.
1555 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1556 if (BlocksInClonedLoop.empty())
1557 UnloopedBlockSet.insert(ClonedPH);
1558 for (auto *ClonedBB : ClonedLoopBlocks)
1559 if (!BlocksInClonedLoop.count(ClonedBB))
1560 UnloopedBlockSet.insert(ClonedBB);
1561
1562 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1563 // backwards across these to process them inside out. The order shouldn't
1564 // matter as we're just trying to build up the map from inside-out; we use
1565 // the map in a more stably ordered way below.
1566 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1567 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1568 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1569 ExitLoopMap.lookup(RHS)->getLoopDepth();
1570 });
1571
1572 // Populate the existing ExitLoopMap with everything reachable from each
1573 // exit, starting from the inner most exit.
1574 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1575 assert(Worklist.empty() && "Didn't clear worklist!");
1576
1577 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1578 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1579
1580 // Walk the CFG back until we hit the cloned PH adding everything reachable
1581 // and in the unlooped set to this exit block's loop.
1582 Worklist.push_back(ExitBB);
1583 do {
1584 BasicBlock *BB = Worklist.pop_back_val();
1585 // We can stop recursing at the cloned preheader (if we get there).
1586 if (BB == ClonedPH)
1587 continue;
1588
1589 for (BasicBlock *PredBB : predecessors(BB)) {
1590 // If this pred has already been moved to our set or is part of some
1591 // (inner) loop, no update needed.
1592 if (!UnloopedBlockSet.erase(PredBB)) {
1593 assert(
1594 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1595 "Predecessor not mapped to a loop!");
1596 continue;
1597 }
1598
1599 // We just insert into the loop set here. We'll add these blocks to the
1600 // exit loop after we build up the set in an order that doesn't rely on
1601 // predecessor order (which in turn relies on use list order).
1602 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1603 (void)Inserted;
1604 assert(Inserted && "Should only visit an unlooped block once!");
1605
1606 // And recurse through to its predecessors.
1607 Worklist.push_back(PredBB);
1608 }
1609 } while (!Worklist.empty());
1610 }
1611
1612 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1613 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1614 // in their original order adding them to the correct loop.
1615
1616 // We need a stable insertion order. We use the order of the original loop
1617 // order and map into the correct parent loop.
1618 for (auto *BB : llvm::concat<BasicBlock *const>(
1619 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1620 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1621 OuterL->addBasicBlockToLoop(BB, LI);
1622
1623#ifndef NDEBUG
1624 for (auto &BBAndL : ExitLoopMap) {
1625 auto *BB = BBAndL.first;
1626 auto *OuterL = BBAndL.second;
1627 assert(LI.getLoopFor(BB) == OuterL &&
1628 "Failed to put all blocks into outer loops!");
1629 }
1630#endif
1631
1632 // Now that all the blocks are placed into the correct containing loop in the
1633 // absence of child loops, find all the potentially cloned child loops and
1634 // clone them into whatever outer loop we placed their header into.
1635 for (Loop *ChildL : OrigL) {
1636 auto *ClonedChildHeader =
1637 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1638 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1639 continue;
1640
1641#ifndef NDEBUG
1642 for (auto *ChildLoopBB : ChildL->blocks())
1643 assert(VMap.count(ChildLoopBB) &&
1644 "Cloned a child loop header but not all of that loops blocks!");
1645#endif
1646
1647 NonChildClonedLoops.push_back(cloneLoopNest(
1648 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1649 }
1650}
1651
1652static void
1654 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1655 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1656 // Find all the dead clones, and remove them from their successors.
1658 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1659 for (const auto &VMap : VMaps)
1660 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1661 if (!DT.isReachableFromEntry(ClonedBB)) {
1662 for (BasicBlock *SuccBB : successors(ClonedBB))
1663 SuccBB->removePredecessor(ClonedBB);
1664 DeadBlocks.push_back(ClonedBB);
1665 }
1666
1667 // Remove all MemorySSA in the dead blocks
1668 if (MSSAU) {
1669 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1670 DeadBlocks.end());
1671 MSSAU->removeBlocks(DeadBlockSet);
1672 }
1673
1674 // Drop any remaining references to break cycles.
1675 for (BasicBlock *BB : DeadBlocks)
1676 BB->dropAllReferences();
1677 // Erase them from the IR.
1678 for (BasicBlock *BB : DeadBlocks)
1679 BB->eraseFromParent();
1680}
1681
1682static void
1685 DominatorTree &DT, LoopInfo &LI,
1686 MemorySSAUpdater *MSSAU,
1687 ScalarEvolution *SE,
1688 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
1689 // Find all the dead blocks tied to this loop, and remove them from their
1690 // successors.
1692
1693 // Start with loop/exit blocks and get a transitive closure of reachable dead
1694 // blocks.
1695 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1696 ExitBlocks.end());
1697 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1698 while (!DeathCandidates.empty()) {
1699 auto *BB = DeathCandidates.pop_back_val();
1700 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1701 for (BasicBlock *SuccBB : successors(BB)) {
1702 SuccBB->removePredecessor(BB);
1703 DeathCandidates.push_back(SuccBB);
1704 }
1705 DeadBlockSet.insert(BB);
1706 }
1707 }
1708
1709 // Remove all MemorySSA in the dead blocks
1710 if (MSSAU)
1711 MSSAU->removeBlocks(DeadBlockSet);
1712
1713 // Filter out the dead blocks from the exit blocks list so that it can be
1714 // used in the caller.
1715 llvm::erase_if(ExitBlocks,
1716 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1717
1718 // Walk from this loop up through its parents removing all of the dead blocks.
1719 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1720 for (auto *BB : DeadBlockSet)
1721 ParentL->getBlocksSet().erase(BB);
1722 llvm::erase_if(ParentL->getBlocksVector(),
1723 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1724 }
1725
1726 // Now delete the dead child loops. This raw delete will clear them
1727 // recursively.
1728 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1729 if (!DeadBlockSet.count(ChildL->getHeader()))
1730 return false;
1731
1732 assert(llvm::all_of(ChildL->blocks(),
1733 [&](BasicBlock *ChildBB) {
1734 return DeadBlockSet.count(ChildBB);
1735 }) &&
1736 "If the child loop header is dead all blocks in the child loop must "
1737 "be dead as well!");
1738 DestroyLoopCB(*ChildL, ChildL->getName());
1739 if (SE)
1741 LI.destroy(ChildL);
1742 return true;
1743 });
1744
1745 // Remove the loop mappings for the dead blocks and drop all the references
1746 // from these blocks to others to handle cyclic references as we start
1747 // deleting the blocks themselves.
1748 for (auto *BB : DeadBlockSet) {
1749 // Check that the dominator tree has already been updated.
1750 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1751 LI.changeLoopFor(BB, nullptr);
1752 // Drop all uses of the instructions to make sure we won't have dangling
1753 // uses in other blocks.
1754 for (auto &I : *BB)
1755 if (!I.use_empty())
1756 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1757 BB->dropAllReferences();
1758 }
1759
1760 // Actually delete the blocks now that they've been fully unhooked from the
1761 // IR.
1762 for (auto *BB : DeadBlockSet)
1763 BB->eraseFromParent();
1764}
1765
1766/// Recompute the set of blocks in a loop after unswitching.
1767///
1768/// This walks from the original headers predecessors to rebuild the loop. We
1769/// take advantage of the fact that new blocks can't have been added, and so we
1770/// filter by the original loop's blocks. This also handles potentially
1771/// unreachable code that we don't want to explore but might be found examining
1772/// the predecessors of the header.
1773///
1774/// If the original loop is no longer a loop, this will return an empty set. If
1775/// it remains a loop, all the blocks within it will be added to the set
1776/// (including those blocks in inner loops).
1778 LoopInfo &LI) {
1780
1781 auto *PH = L.getLoopPreheader();
1782 auto *Header = L.getHeader();
1783
1784 // A worklist to use while walking backwards from the header.
1786
1787 // First walk the predecessors of the header to find the backedges. This will
1788 // form the basis of our walk.
1789 for (auto *Pred : predecessors(Header)) {
1790 // Skip the preheader.
1791 if (Pred == PH)
1792 continue;
1793
1794 // Because the loop was in simplified form, the only non-loop predecessor
1795 // is the preheader.
1796 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1797 "than the preheader that is not part of the "
1798 "loop!");
1799
1800 // Insert this block into the loop set and on the first visit and, if it
1801 // isn't the header we're currently walking, put it into the worklist to
1802 // recurse through.
1803 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1804 Worklist.push_back(Pred);
1805 }
1806
1807 // If no backedges were found, we're done.
1808 if (LoopBlockSet.empty())
1809 return LoopBlockSet;
1810
1811 // We found backedges, recurse through them to identify the loop blocks.
1812 while (!Worklist.empty()) {
1813 BasicBlock *BB = Worklist.pop_back_val();
1814 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1815
1816 // No need to walk past the header.
1817 if (BB == Header)
1818 continue;
1819
1820 // Because we know the inner loop structure remains valid we can use the
1821 // loop structure to jump immediately across the entire nested loop.
1822 // Further, because it is in loop simplified form, we can directly jump
1823 // to its preheader afterward.
1824 if (Loop *InnerL = LI.getLoopFor(BB))
1825 if (InnerL != &L) {
1826 assert(L.contains(InnerL) &&
1827 "Should not reach a loop *outside* this loop!");
1828 // The preheader is the only possible predecessor of the loop so
1829 // insert it into the set and check whether it was already handled.
1830 auto *InnerPH = InnerL->getLoopPreheader();
1831 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1832 "but not contain the inner loop "
1833 "preheader!");
1834 if (!LoopBlockSet.insert(InnerPH).second)
1835 // The only way to reach the preheader is through the loop body
1836 // itself so if it has been visited the loop is already handled.
1837 continue;
1838
1839 // Insert all of the blocks (other than those already present) into
1840 // the loop set. We expect at least the block that led us to find the
1841 // inner loop to be in the block set, but we may also have other loop
1842 // blocks if they were already enqueued as predecessors of some other
1843 // outer loop block.
1844 for (auto *InnerBB : InnerL->blocks()) {
1845 if (InnerBB == BB) {
1846 assert(LoopBlockSet.count(InnerBB) &&
1847 "Block should already be in the set!");
1848 continue;
1849 }
1850
1851 LoopBlockSet.insert(InnerBB);
1852 }
1853
1854 // Add the preheader to the worklist so we will continue past the
1855 // loop body.
1856 Worklist.push_back(InnerPH);
1857 continue;
1858 }
1859
1860 // Insert any predecessors that were in the original loop into the new
1861 // set, and if the insert is successful, add them to the worklist.
1862 for (auto *Pred : predecessors(BB))
1863 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1864 Worklist.push_back(Pred);
1865 }
1866
1867 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1868
1869 // We've found all the blocks participating in the loop, return our completed
1870 // set.
1871 return LoopBlockSet;
1872}
1873
1874/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1875///
1876/// The removal may have removed some child loops entirely but cannot have
1877/// disturbed any remaining child loops. However, they may need to be hoisted
1878/// to the parent loop (or to be top-level loops). The original loop may be
1879/// completely removed.
1880///
1881/// The sibling loops resulting from this update are returned. If the original
1882/// loop remains a valid loop, it will be the first entry in this list with all
1883/// of the newly sibling loops following it.
1884///
1885/// Returns true if the loop remains a loop after unswitching, and false if it
1886/// is no longer a loop after unswitching (and should not continue to be
1887/// referenced).
1889 LoopInfo &LI,
1890 SmallVectorImpl<Loop *> &HoistedLoops,
1891 ScalarEvolution *SE) {
1892 auto *PH = L.getLoopPreheader();
1893
1894 // Compute the actual parent loop from the exit blocks. Because we may have
1895 // pruned some exits the loop may be different from the original parent.
1896 Loop *ParentL = nullptr;
1897 SmallVector<Loop *, 4> ExitLoops;
1898 SmallVector<BasicBlock *, 4> ExitsInLoops;
1899 ExitsInLoops.reserve(ExitBlocks.size());
1900 for (auto *ExitBB : ExitBlocks)
1901 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1902 ExitLoops.push_back(ExitL);
1903 ExitsInLoops.push_back(ExitBB);
1904 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1905 ParentL = ExitL;
1906 }
1907
1908 // Recompute the blocks participating in this loop. This may be empty if it
1909 // is no longer a loop.
1910 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1911
1912 // If we still have a loop, we need to re-set the loop's parent as the exit
1913 // block set changing may have moved it within the loop nest. Note that this
1914 // can only happen when this loop has a parent as it can only hoist the loop
1915 // *up* the nest.
1916 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1917 // Remove this loop's (original) blocks from all of the intervening loops.
1918 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1919 IL = IL->getParentLoop()) {
1920 IL->getBlocksSet().erase(PH);
1921 for (auto *BB : L.blocks())
1922 IL->getBlocksSet().erase(BB);
1923 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1924 return BB == PH || L.contains(BB);
1925 });
1926 }
1927
1928 LI.changeLoopFor(PH, ParentL);
1929 L.getParentLoop()->removeChildLoop(&L);
1930 if (ParentL)
1931 ParentL->addChildLoop(&L);
1932 else
1933 LI.addTopLevelLoop(&L);
1934 }
1935
1936 // Now we update all the blocks which are no longer within the loop.
1937 auto &Blocks = L.getBlocksVector();
1938 auto BlocksSplitI =
1939 LoopBlockSet.empty()
1940 ? Blocks.begin()
1941 : std::stable_partition(
1942 Blocks.begin(), Blocks.end(),
1943 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1944
1945 // Before we erase the list of unlooped blocks, build a set of them.
1946 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1947 if (LoopBlockSet.empty())
1948 UnloopedBlocks.insert(PH);
1949
1950 // Now erase these blocks from the loop.
1951 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1952 L.getBlocksSet().erase(BB);
1953 Blocks.erase(BlocksSplitI, Blocks.end());
1954
1955 // Sort the exits in ascending loop depth, we'll work backwards across these
1956 // to process them inside out.
1957 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1958 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1959 });
1960
1961 // We'll build up a set for each exit loop.
1962 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1963 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1964
1965 auto RemoveUnloopedBlocksFromLoop =
1966 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1967 for (auto *BB : UnloopedBlocks)
1968 L.getBlocksSet().erase(BB);
1969 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1970 return UnloopedBlocks.count(BB);
1971 });
1972 };
1973
1975 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1976 assert(Worklist.empty() && "Didn't clear worklist!");
1977 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1978
1979 // Grab the next exit block, in decreasing loop depth order.
1980 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1981 Loop &ExitL = *LI.getLoopFor(ExitBB);
1982 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1983
1984 // Erase all of the unlooped blocks from the loops between the previous
1985 // exit loop and this exit loop. This works because the ExitInLoops list is
1986 // sorted in increasing order of loop depth and thus we visit loops in
1987 // decreasing order of loop depth.
1988 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1989 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1990
1991 // Walk the CFG back until we hit the cloned PH adding everything reachable
1992 // and in the unlooped set to this exit block's loop.
1993 Worklist.push_back(ExitBB);
1994 do {
1995 BasicBlock *BB = Worklist.pop_back_val();
1996 // We can stop recursing at the cloned preheader (if we get there).
1997 if (BB == PH)
1998 continue;
1999
2000 for (BasicBlock *PredBB : predecessors(BB)) {
2001 // If this pred has already been moved to our set or is part of some
2002 // (inner) loop, no update needed.
2003 if (!UnloopedBlocks.erase(PredBB)) {
2004 assert((NewExitLoopBlocks.count(PredBB) ||
2005 ExitL.contains(LI.getLoopFor(PredBB))) &&
2006 "Predecessor not in a nested loop (or already visited)!");
2007 continue;
2008 }
2009
2010 // We just insert into the loop set here. We'll add these blocks to the
2011 // exit loop after we build up the set in a deterministic order rather
2012 // than the predecessor-influenced visit order.
2013 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2014 (void)Inserted;
2015 assert(Inserted && "Should only visit an unlooped block once!");
2016
2017 // And recurse through to its predecessors.
2018 Worklist.push_back(PredBB);
2019 }
2020 } while (!Worklist.empty());
2021
2022 // If blocks in this exit loop were directly part of the original loop (as
2023 // opposed to a child loop) update the map to point to this exit loop. This
2024 // just updates a map and so the fact that the order is unstable is fine.
2025 for (auto *BB : NewExitLoopBlocks)
2026 if (Loop *BBL = LI.getLoopFor(BB))
2027 if (BBL == &L || !L.contains(BBL))
2028 LI.changeLoopFor(BB, &ExitL);
2029
2030 // We will remove the remaining unlooped blocks from this loop in the next
2031 // iteration or below.
2032 NewExitLoopBlocks.clear();
2033 }
2034
2035 // Any remaining unlooped blocks are no longer part of any loop unless they
2036 // are part of some child loop.
2037 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2038 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2039 for (auto *BB : UnloopedBlocks)
2040 if (Loop *BBL = LI.getLoopFor(BB))
2041 if (BBL == &L || !L.contains(BBL))
2042 LI.changeLoopFor(BB, nullptr);
2043
2044 // Sink all the child loops whose headers are no longer in the loop set to
2045 // the parent (or to be top level loops). We reach into the loop and directly
2046 // update its subloop vector to make this batch update efficient.
2047 auto &SubLoops = L.getSubLoopsVector();
2048 auto SubLoopsSplitI =
2049 LoopBlockSet.empty()
2050 ? SubLoops.begin()
2051 : std::stable_partition(
2052 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2053 return LoopBlockSet.count(SubL->getHeader());
2054 });
2055 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2056 HoistedLoops.push_back(HoistedL);
2057 HoistedL->setParentLoop(nullptr);
2058
2059 // To compute the new parent of this hoisted loop we look at where we
2060 // placed the preheader above. We can't lookup the header itself because we
2061 // retained the mapping from the header to the hoisted loop. But the
2062 // preheader and header should have the exact same new parent computed
2063 // based on the set of exit blocks from the original loop as the preheader
2064 // is a predecessor of the header and so reached in the reverse walk. And
2065 // because the loops were all in simplified form the preheader of the
2066 // hoisted loop can't be part of some *other* loop.
2067 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2068 NewParentL->addChildLoop(HoistedL);
2069 else
2070 LI.addTopLevelLoop(HoistedL);
2071 }
2072 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2073
2074 // Actually delete the loop if nothing remained within it.
2075 if (Blocks.empty()) {
2076 assert(SubLoops.empty() &&
2077 "Failed to remove all subloops from the original loop!");
2078 if (Loop *ParentL = L.getParentLoop())
2079 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2080 else
2081 LI.removeLoop(llvm::find(LI, &L));
2082 // markLoopAsDeleted for L should be triggered by the caller (it is typically
2083 // done by using the UnswitchCB callback).
2084 if (SE)
2086 LI.destroy(&L);
2087 return false;
2088 }
2089
2090 return true;
2091}
2092
2093/// Helper to visit a dominator subtree, invoking a callable on each node.
2094///
2095/// Returning false at any point will stop walking past that node of the tree.
2096template <typename CallableT>
2097void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2099 DomWorklist.push_back(DT[BB]);
2100#ifndef NDEBUG
2102 Visited.insert(DT[BB]);
2103#endif
2104 do {
2105 DomTreeNode *N = DomWorklist.pop_back_val();
2106
2107 // Visit this node.
2108 if (!Callable(N->getBlock()))
2109 continue;
2110
2111 // Accumulate the child nodes.
2112 for (DomTreeNode *ChildN : *N) {
2113 assert(Visited.insert(ChildN).second &&
2114 "Cannot visit a node twice when walking a tree!");
2115 DomWorklist.push_back(ChildN);
2116 }
2117 } while (!DomWorklist.empty());
2118}
2119
2121 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2122 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2123 AssumptionCache &AC,
2124 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2126 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2127 auto *ParentBB = TI.getParent();
2128 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2129 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2130
2131 // We can only unswitch switches, conditional branches with an invariant
2132 // condition, or combining invariant conditions with an instruction or
2133 // partially invariant instructions.
2134 assert((SI || (BI && BI->isConditional())) &&
2135 "Can only unswitch switches and conditional branch!");
2136 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2137 bool FullUnswitch =
2138 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2139 !PartiallyInvariant);
2140 if (FullUnswitch)
2141 assert(Invariants.size() == 1 &&
2142 "Cannot have other invariants with full unswitching!");
2143 else
2144 assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
2145 "Partial unswitching requires an instruction as the condition!");
2146
2147 if (MSSAU && VerifyMemorySSA)
2148 MSSAU->getMemorySSA()->verifyMemorySSA();
2149
2150 // Constant and BBs tracking the cloned and continuing successor. When we are
2151 // unswitching the entire condition, this can just be trivially chosen to
2152 // unswitch towards `true`. However, when we are unswitching a set of
2153 // invariants combined with `and` or `or` or partially invariant instructions,
2154 // the combining operation determines the best direction to unswitch: we want
2155 // to unswitch the direction that will collapse the branch.
2156 bool Direction = true;
2157 int ClonedSucc = 0;
2158 if (!FullUnswitch) {
2160 (void)Cond;
2162 PartiallyInvariant) &&
2163 "Only `or`, `and`, an `select`, partially invariant instructions "
2164 "can combine invariants being unswitched.");
2165 if (!match(Cond, m_LogicalOr())) {
2166 if (match(Cond, m_LogicalAnd()) ||
2167 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2168 Direction = false;
2169 ClonedSucc = 1;
2170 }
2171 }
2172 }
2173
2174 BasicBlock *RetainedSuccBB =
2175 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2176 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2177 if (BI)
2178 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2179 else
2180 for (auto Case : SI->cases())
2181 if (Case.getCaseSuccessor() != RetainedSuccBB)
2182 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2183
2184 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2185 "Should not unswitch the same successor we are retaining!");
2186
2187 // The branch should be in this exact loop. Any inner loop's invariant branch
2188 // should be handled by unswitching that inner loop. The caller of this
2189 // routine should filter out any candidates that remain (but were skipped for
2190 // whatever reason).
2191 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2192
2193 // Compute the parent loop now before we start hacking on things.
2194 Loop *ParentL = L.getParentLoop();
2195 // Get blocks in RPO order for MSSA update, before changing the CFG.
2196 LoopBlocksRPO LBRPO(&L);
2197 if (MSSAU)
2198 LBRPO.perform(&LI);
2199
2200 // Compute the outer-most loop containing one of our exit blocks. This is the
2201 // furthest up our loopnest which can be mutated, which we will use below to
2202 // update things.
2203 Loop *OuterExitL = &L;
2205 L.getUniqueExitBlocks(ExitBlocks);
2206 for (auto *ExitBB : ExitBlocks) {
2207 Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2208 if (!NewOuterExitL) {
2209 // We exited the entire nest with this block, so we're done.
2210 OuterExitL = nullptr;
2211 break;
2212 }
2213 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2214 OuterExitL = NewOuterExitL;
2215 }
2216
2217 // At this point, we're definitely going to unswitch something so invalidate
2218 // any cached information in ScalarEvolution for the outer most loop
2219 // containing an exit block and all nested loops.
2220 if (SE) {
2221 if (OuterExitL)
2222 SE->forgetLoop(OuterExitL);
2223 else
2224 SE->forgetTopmostLoop(&L);
2226 }
2227
2228 bool InsertFreeze = false;
2230 ICFLoopSafetyInfo SafetyInfo;
2231 SafetyInfo.computeLoopSafetyInfo(&L);
2232 InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
2233 }
2234
2235 // Perform the isGuaranteedNotToBeUndefOrPoison() query before the transform,
2236 // otherwise the branch instruction will have been moved outside the loop
2237 // already, and may imply that a poison condition is always UB.
2238 Value *FullUnswitchCond = nullptr;
2239 if (FullUnswitch) {
2240 FullUnswitchCond =
2241 BI ? skipTrivialSelect(BI->getCondition()) : SI->getCondition();
2242 if (InsertFreeze)
2243 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
2244 FullUnswitchCond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
2245 }
2246
2247 // If the edge from this terminator to a successor dominates that successor,
2248 // store a map from each block in its dominator subtree to it. This lets us
2249 // tell when cloning for a particular successor if a block is dominated by
2250 // some *other* successor with a single data structure. We use this to
2251 // significantly reduce cloning.
2253 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2254 UnswitchedSuccBBs))
2255 if (SuccBB->getUniquePredecessor() ||
2256 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2257 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2258 }))
2259 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2260 DominatingSucc[BB] = SuccBB;
2261 return true;
2262 });
2263
2264 // Split the preheader, so that we know that there is a safe place to insert
2265 // the conditional branch. We will change the preheader to have a conditional
2266 // branch on LoopCond. The original preheader will become the split point
2267 // between the unswitched versions, and we will have a new preheader for the
2268 // original loop.
2269 BasicBlock *SplitBB = L.getLoopPreheader();
2270 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2271
2272 // Keep track of the dominator tree updates needed.
2274
2275 // Clone the loop for each unswitched successor.
2277 VMaps.reserve(UnswitchedSuccBBs.size());
2279 for (auto *SuccBB : UnswitchedSuccBBs) {
2280 VMaps.emplace_back(new ValueToValueMapTy());
2281 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2282 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2283 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2284 }
2285
2286 // Drop metadata if we may break its semantics by moving this instr into the
2287 // split block.
2288 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2290 // Do not spend time trying to understand if we can keep it, just drop it
2291 // to save compile time.
2292 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2293 else {
2294 // It is only legal to preserve make.implicit metadata if we are
2295 // guaranteed no reach implicit null check after following this branch.
2296 ICFLoopSafetyInfo SafetyInfo;
2297 SafetyInfo.computeLoopSafetyInfo(&L);
2298 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2299 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2300 }
2301 }
2302
2303 // The stitching of the branched code back together depends on whether we're
2304 // doing full unswitching or not with the exception that we always want to
2305 // nuke the initial terminator placed in the split block.
2306 SplitBB->getTerminator()->eraseFromParent();
2307 if (FullUnswitch) {
2308 // Splice the terminator from the original loop and rewrite its
2309 // successors.
2310 SplitBB->splice(SplitBB->end(), ParentBB, TI.getIterator());
2311
2312 // Keep a clone of the terminator for MSSA updates.
2313 Instruction *NewTI = TI.clone();
2314 NewTI->insertInto(ParentBB, ParentBB->end());
2315
2316 // First wire up the moved terminator to the preheaders.
2317 if (BI) {
2318 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2319 BI->setSuccessor(ClonedSucc, ClonedPH);
2320 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2321 if (InsertFreeze)
2322 FullUnswitchCond = new FreezeInst(
2323 FullUnswitchCond, FullUnswitchCond->getName() + ".fr", BI);
2324 BI->setCondition(FullUnswitchCond);
2325 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2326 } else {
2327 assert(SI && "Must either be a branch or switch!");
2328
2329 // Walk the cases and directly update their successors.
2330 assert(SI->getDefaultDest() == RetainedSuccBB &&
2331 "Not retaining default successor!");
2332 SI->setDefaultDest(LoopPH);
2333 for (const auto &Case : SI->cases())
2334 if (Case.getCaseSuccessor() == RetainedSuccBB)
2335 Case.setSuccessor(LoopPH);
2336 else
2337 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2338
2339 if (InsertFreeze)
2340 SI->setCondition(new FreezeInst(
2341 FullUnswitchCond, FullUnswitchCond->getName() + ".fr", SI));
2342
2343 // We need to use the set to populate domtree updates as even when there
2344 // are multiple cases pointing at the same successor we only want to
2345 // remove and insert one edge in the domtree.
2346 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2347 DTUpdates.push_back(
2348 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2349 }
2350
2351 if (MSSAU) {
2352 DT.applyUpdates(DTUpdates);
2353 DTUpdates.clear();
2354
2355 // Remove all but one edge to the retained block and all unswitched
2356 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2357 // when we know we only keep a single edge for each case.
2358 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2359 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2360 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2361
2362 for (auto &VMap : VMaps)
2363 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2364 /*IgnoreIncomingWithNoClones=*/true);
2365 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2366
2367 // Remove all edges to unswitched blocks.
2368 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2369 MSSAU->removeEdge(ParentBB, SuccBB);
2370 }
2371
2372 // Now unhook the successor relationship as we'll be replacing
2373 // the terminator with a direct branch. This is much simpler for branches
2374 // than switches so we handle those first.
2375 if (BI) {
2376 // Remove the parent as a predecessor of the unswitched successor.
2377 assert(UnswitchedSuccBBs.size() == 1 &&
2378 "Only one possible unswitched block for a branch!");
2379 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2380 UnswitchedSuccBB->removePredecessor(ParentBB,
2381 /*KeepOneInputPHIs*/ true);
2382 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2383 } else {
2384 // Note that we actually want to remove the parent block as a predecessor
2385 // of *every* case successor. The case successor is either unswitched,
2386 // completely eliminating an edge from the parent to that successor, or it
2387 // is a duplicate edge to the retained successor as the retained successor
2388 // is always the default successor and as we'll replace this with a direct
2389 // branch we no longer need the duplicate entries in the PHI nodes.
2390 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2391 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2392 "Not retaining default successor!");
2393 for (const auto &Case : NewSI->cases())
2394 Case.getCaseSuccessor()->removePredecessor(
2395 ParentBB,
2396 /*KeepOneInputPHIs*/ true);
2397
2398 // We need to use the set to populate domtree updates as even when there
2399 // are multiple cases pointing at the same successor we only want to
2400 // remove and insert one edge in the domtree.
2401 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2402 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2403 }
2404
2405 // After MSSAU update, remove the cloned terminator instruction NewTI.
2406 ParentBB->getTerminator()->eraseFromParent();
2407
2408 // Create a new unconditional branch to the continuing block (as opposed to
2409 // the one cloned).
2410 BranchInst::Create(RetainedSuccBB, ParentBB);
2411 } else {
2412 assert(BI && "Only branches have partial unswitching.");
2413 assert(UnswitchedSuccBBs.size() == 1 &&
2414 "Only one possible unswitched block for a branch!");
2415 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2416 // When doing a partial unswitch, we have to do a bit more work to build up
2417 // the branch in the split block.
2418 if (PartiallyInvariant)
2420 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2421 else {
2423 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2424 FreezeLoopUnswitchCond, BI, &AC, DT);
2425 }
2426 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2427
2428 if (MSSAU) {
2429 DT.applyUpdates(DTUpdates);
2430 DTUpdates.clear();
2431
2432 // Perform MSSA cloning updates.
2433 for (auto &VMap : VMaps)
2434 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2435 /*IgnoreIncomingWithNoClones=*/true);
2436 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2437 }
2438 }
2439
2440 // Apply the updates accumulated above to get an up-to-date dominator tree.
2441 DT.applyUpdates(DTUpdates);
2442
2443 // Now that we have an accurate dominator tree, first delete the dead cloned
2444 // blocks so that we can accurately build any cloned loops. It is important to
2445 // not delete the blocks from the original loop yet because we still want to
2446 // reference the original loop to understand the cloned loop's structure.
2447 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2448
2449 // Build the cloned loop structure itself. This may be substantially
2450 // different from the original structure due to the simplified CFG. This also
2451 // handles inserting all the cloned blocks into the correct loops.
2452 SmallVector<Loop *, 4> NonChildClonedLoops;
2453 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2454 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2455
2456 // Now that our cloned loops have been built, we can update the original loop.
2457 // First we delete the dead blocks from it and then we rebuild the loop
2458 // structure taking these deletions into account.
2459 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE,DestroyLoopCB);
2460
2461 if (MSSAU && VerifyMemorySSA)
2462 MSSAU->getMemorySSA()->verifyMemorySSA();
2463
2464 SmallVector<Loop *, 4> HoistedLoops;
2465 bool IsStillLoop =
2466 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2467
2468 if (MSSAU && VerifyMemorySSA)
2469 MSSAU->getMemorySSA()->verifyMemorySSA();
2470
2471 // This transformation has a high risk of corrupting the dominator tree, and
2472 // the below steps to rebuild loop structures will result in hard to debug
2473 // errors in that case so verify that the dominator tree is sane first.
2474 // FIXME: Remove this when the bugs stop showing up and rely on existing
2475 // verification steps.
2476 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2477
2478 if (BI && !PartiallyInvariant) {
2479 // If we unswitched a branch which collapses the condition to a known
2480 // constant we want to replace all the uses of the invariants within both
2481 // the original and cloned blocks. We do this here so that we can use the
2482 // now updated dominator tree to identify which side the users are on.
2483 assert(UnswitchedSuccBBs.size() == 1 &&
2484 "Only one possible unswitched block for a branch!");
2485 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2486
2487 // When considering multiple partially-unswitched invariants
2488 // we cant just go replace them with constants in both branches.
2489 //
2490 // For 'AND' we infer that true branch ("continue") means true
2491 // for each invariant operand.
2492 // For 'OR' we can infer that false branch ("continue") means false
2493 // for each invariant operand.
2494 // So it happens that for multiple-partial case we dont replace
2495 // in the unswitched branch.
2496 bool ReplaceUnswitched =
2497 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2498
2499 ConstantInt *UnswitchedReplacement =
2502 ConstantInt *ContinueReplacement =
2505 for (Value *Invariant : Invariants) {
2506 assert(!isa<Constant>(Invariant) &&
2507 "Should not be replacing constant values!");
2508 // Use make_early_inc_range here as set invalidates the iterator.
2509 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2510 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2511 if (!UserI)
2512 continue;
2513
2514 // Replace it with the 'continue' side if in the main loop body, and the
2515 // unswitched if in the cloned blocks.
2516 if (DT.dominates(LoopPH, UserI->getParent()))
2517 U.set(ContinueReplacement);
2518 else if (ReplaceUnswitched &&
2519 DT.dominates(ClonedPH, UserI->getParent()))
2520 U.set(UnswitchedReplacement);
2521 }
2522 }
2523 }
2524
2525 // We can change which blocks are exit blocks of all the cloned sibling
2526 // loops, the current loop, and any parent loops which shared exit blocks
2527 // with the current loop. As a consequence, we need to re-form LCSSA for
2528 // them. But we shouldn't need to re-form LCSSA for any child loops.
2529 // FIXME: This could be made more efficient by tracking which exit blocks are
2530 // new, and focusing on them, but that isn't likely to be necessary.
2531 //
2532 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2533 // loop nest and update every loop that could have had its exits changed. We
2534 // also need to cover any intervening loops. We add all of these loops to
2535 // a list and sort them by loop depth to achieve this without updating
2536 // unnecessary loops.
2537 auto UpdateLoop = [&](Loop &UpdateL) {
2538#ifndef NDEBUG
2539 UpdateL.verifyLoop();
2540 for (Loop *ChildL : UpdateL) {
2541 ChildL->verifyLoop();
2542 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2543 "Perturbed a child loop's LCSSA form!");
2544 }
2545#endif
2546 // First build LCSSA for this loop so that we can preserve it when
2547 // forming dedicated exits. We don't want to perturb some other loop's
2548 // LCSSA while doing that CFG edit.
2549 formLCSSA(UpdateL, DT, &LI, SE);
2550
2551 // For loops reached by this loop's original exit blocks we may
2552 // introduced new, non-dedicated exits. At least try to re-form dedicated
2553 // exits for these loops. This may fail if they couldn't have dedicated
2554 // exits to start with.
2555 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2556 };
2557
2558 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2559 // and we can do it in any order as they don't nest relative to each other.
2560 //
2561 // Also check if any of the loops we have updated have become top-level loops
2562 // as that will necessitate widening the outer loop scope.
2563 for (Loop *UpdatedL :
2564 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2565 UpdateLoop(*UpdatedL);
2566 if (UpdatedL->isOutermost())
2567 OuterExitL = nullptr;
2568 }
2569 if (IsStillLoop) {
2570 UpdateLoop(L);
2571 if (L.isOutermost())
2572 OuterExitL = nullptr;
2573 }
2574
2575 // If the original loop had exit blocks, walk up through the outer most loop
2576 // of those exit blocks to update LCSSA and form updated dedicated exits.
2577 if (OuterExitL != &L)
2578 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2579 OuterL = OuterL->getParentLoop())
2580 UpdateLoop(*OuterL);
2581
2582#ifndef NDEBUG
2583 // Verify the entire loop structure to catch any incorrect updates before we
2584 // progress in the pass pipeline.
2585 LI.verify(DT);
2586#endif
2587
2588 // Now that we've unswitched something, make callbacks to report the changes.
2589 // For that we need to merge together the updated loops and the cloned loops
2590 // and check whether the original loop survived.
2591 SmallVector<Loop *, 4> SibLoops;
2592 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2593 if (UpdatedL->getParentLoop() == ParentL)
2594 SibLoops.push_back(UpdatedL);
2595 UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2596
2597 if (MSSAU && VerifyMemorySSA)
2598 MSSAU->getMemorySSA()->verifyMemorySSA();
2599
2600 if (BI)
2601 ++NumBranches;
2602 else
2603 ++NumSwitches;
2604}
2605
2606/// Recursively compute the cost of a dominator subtree based on the per-block
2607/// cost map provided.
2608///
2609/// The recursive computation is memozied into the provided DT-indexed cost map
2610/// to allow querying it for most nodes in the domtree without it becoming
2611/// quadratic.
2613 DomTreeNode &N,
2616 // Don't accumulate cost (or recurse through) blocks not in our block cost
2617 // map and thus not part of the duplication cost being considered.
2618 auto BBCostIt = BBCostMap.find(N.getBlock());
2619 if (BBCostIt == BBCostMap.end())
2620 return 0;
2621
2622 // Lookup this node to see if we already computed its cost.
2623 auto DTCostIt = DTCostMap.find(&N);
2624 if (DTCostIt != DTCostMap.end())
2625 return DTCostIt->second;
2626
2627 // If not, we have to compute it. We can't use insert above and update
2628 // because computing the cost may insert more things into the map.
2629 InstructionCost Cost = std::accumulate(
2630 N.begin(), N.end(), BBCostIt->second,
2631 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2632 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2633 });
2634 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2635 (void)Inserted;
2636 assert(Inserted && "Should not insert a node while visiting children!");
2637 return Cost;
2638}
2639
2640/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2641/// making the following replacement:
2642///
2643/// --code before guard--
2644/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2645/// --code after guard--
2646///
2647/// into
2648///
2649/// --code before guard--
2650/// br i1 %cond, label %guarded, label %deopt
2651///
2652/// guarded:
2653/// --code after guard--
2654///
2655/// deopt:
2656/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2657/// unreachable
2658///
2659/// It also makes all relevant DT and LI updates, so that all structures are in
2660/// valid state after this transform.
2662 DominatorTree &DT, LoopInfo &LI,
2663 MemorySSAUpdater *MSSAU) {
2665 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2666 BasicBlock *CheckBB = GI->getParent();
2667
2668 if (MSSAU && VerifyMemorySSA)
2669 MSSAU->getMemorySSA()->verifyMemorySSA();
2670
2671 // Remove all CheckBB's successors from DomTree. A block can be seen among
2672 // successors more than once, but for DomTree it should be added only once.
2674 for (auto *Succ : successors(CheckBB))
2675 if (Successors.insert(Succ).second)
2676 DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2677
2678 Instruction *DeoptBlockTerm =
2679 SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2680 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2681 // SplitBlockAndInsertIfThen inserts control flow that branches to
2682 // DeoptBlockTerm if the condition is true. We want the opposite.
2683 CheckBI->swapSuccessors();
2684
2685 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2686 GuardedBlock->setName("guarded");
2687 CheckBI->getSuccessor(1)->setName("deopt");
2688 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2689
2690 if (MSSAU)
2691 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2692
2693 GI->moveBefore(DeoptBlockTerm);
2695
2696 // Add new successors of CheckBB into DomTree.
2697 for (auto *Succ : successors(CheckBB))
2698 DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2699
2700 // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2701 // successors.
2702 for (auto *Succ : Successors)
2703 DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2704
2705 // Make proper changes to DT.
2706 DT.applyUpdates(DTUpdates);
2707 // Inform LI of a new loop block.
2708 L.addBasicBlockToLoop(GuardedBlock, LI);
2709
2710 if (MSSAU) {
2711 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2712 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2713 if (VerifyMemorySSA)
2714 MSSAU->getMemorySSA()->verifyMemorySSA();
2715 }
2716
2717 ++NumGuards;
2718 return CheckBI;
2719}
2720
2721/// Cost multiplier is a way to limit potentially exponential behavior
2722/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2723/// candidates available. Also accounting for the number of "sibling" loops with
2724/// the idea to account for previous unswitches that already happened on this
2725/// cluster of loops. There was an attempt to keep this formula simple,
2726/// just enough to limit the worst case behavior. Even if it is not that simple
2727/// now it is still not an attempt to provide a detailed heuristic size
2728/// prediction.
2729///
2730/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2731/// unswitch candidates, making adequate predictions instead of wild guesses.
2732/// That requires knowing not just the number of "remaining" candidates but
2733/// also costs of unswitching for each of these candidates.
2735 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2736 const DominatorTree &DT,
2737 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2738
2739 // Guards and other exiting conditions do not contribute to exponential
2740 // explosion as soon as they dominate the latch (otherwise there might be
2741 // another path to the latch remaining that does not allow to eliminate the
2742 // loop copy on unswitch).
2743 const BasicBlock *Latch = L.getLoopLatch();
2744 const BasicBlock *CondBlock = TI.getParent();
2745 if (DT.dominates(CondBlock, Latch) &&
2746 (isGuard(&TI) ||
2747 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2748 return L.contains(SuccBB);
2749 }) <= 1)) {
2750 NumCostMultiplierSkipped++;
2751 return 1;
2752 }
2753
2754 auto *ParentL = L.getParentLoop();
2755 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2756 : std::distance(LI.begin(), LI.end()));
2757 // Count amount of clones that all the candidates might cause during
2758 // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2759 int UnswitchedClones = 0;
2760 for (auto Candidate : UnswitchCandidates) {
2761 const Instruction *CI = Candidate.TI;
2762 const BasicBlock *CondBlock = CI->getParent();
2763 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2764 if (isGuard(CI)) {
2765 if (!SkipExitingSuccessors)
2766 UnswitchedClones++;
2767 continue;
2768 }
2769 int NonExitingSuccessors =
2770 llvm::count_if(successors(CondBlock),
2771 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2772 return !SkipExitingSuccessors || L.contains(SuccBB);
2773 });
2774 UnswitchedClones += Log2_32(NonExitingSuccessors);
2775 }
2776
2777 // Ignore up to the "unscaled candidates" number of unswitch candidates
2778 // when calculating the power-of-two scaling of the cost. The main idea
2779 // with this control is to allow a small number of unswitches to happen
2780 // and rely more on siblings multiplier (see below) when the number
2781 // of candidates is small.
2782 unsigned ClonesPower =
2783 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2784
2785 // Allowing top-level loops to spread a bit more than nested ones.
2786 int SiblingsMultiplier =
2787 std::max((ParentL ? SiblingsCount
2788 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2789 1);
2790 // Compute the cost multiplier in a way that won't overflow by saturating
2791 // at an upper bound.
2792 int CostMultiplier;
2793 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2794 SiblingsMultiplier > UnswitchThreshold)
2795 CostMultiplier = UnswitchThreshold;
2796 else
2797 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2798 (int)UnswitchThreshold);
2799
2800 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2801 << " (siblings " << SiblingsMultiplier << " * clones "
2802 << (1 << ClonesPower) << ")"
2803 << " for unswitch candidate: " << TI << "\n");
2804 return CostMultiplier;
2805}
2806
2809 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2810 const Loop &L, const LoopInfo &LI, AAResults &AA,
2811 const MemorySSAUpdater *MSSAU) {
2812 assert(UnswitchCandidates.empty() && "Should be!");
2813 // Whether or not we should also collect guards in the loop.
2814 bool CollectGuards = false;
2815 if (UnswitchGuards) {
2816 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2817 Intrinsic::getName(Intrinsic::experimental_guard));
2818 if (GuardDecl && !GuardDecl->use_empty())
2819 CollectGuards = true;
2820 }
2821
2822 for (auto *BB : L.blocks()) {
2823 if (LI.getLoopFor(BB) != &L)
2824 continue;
2825
2826 if (CollectGuards)
2827 for (auto &I : *BB)
2828 if (isGuard(&I)) {
2829 auto *Cond =
2830 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2831 // TODO: Support AND, OR conditions and partial unswitching.
2832 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2833 UnswitchCandidates.push_back({&I, {Cond}});
2834 }
2835
2836 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2837 // We can only consider fully loop-invariant switch conditions as we need
2838 // to completely eliminate the switch after unswitching.
2839 if (!isa<Constant>(SI->getCondition()) &&
2840 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2841 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2842 continue;
2843 }
2844
2845 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2846 if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2847 BI->getSuccessor(0) == BI->getSuccessor(1))
2848 continue;
2849
2850 Value *Cond = skipTrivialSelect(BI->getCondition());
2851 if (isa<Constant>(Cond))
2852 continue;
2853
2854 if (L.isLoopInvariant(Cond)) {
2855 UnswitchCandidates.push_back({BI, {Cond}});
2856 continue;
2857 }
2858
2859 Instruction &CondI = *cast<Instruction>(Cond);
2860 if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2861 TinyPtrVector<Value *> Invariants =
2863 if (Invariants.empty())
2864 continue;
2865
2866 UnswitchCandidates.push_back({BI, std::move(Invariants)});
2867 continue;
2868 }
2869 }
2870
2871 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2872 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2873 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2874 })) {
2875 MemorySSA *MSSA = MSSAU->getMemorySSA();
2876 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2877 LLVM_DEBUG(
2878 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2879 << *Info->InstToDuplicate[0] << "\n");
2880 PartialIVInfo = *Info;
2881 PartialIVCondBranch = L.getHeader()->getTerminator();
2882 TinyPtrVector<Value *> ValsToDuplicate;
2883 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2884 UnswitchCandidates.push_back(
2885 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2886 }
2887 }
2888 return !UnswitchCandidates.empty();
2889}
2890
2891/// Tries to canonicalize condition described by:
2892///
2893/// br (LHS pred RHS), label IfTrue, label IfFalse
2894///
2895/// into its equivalent where `Pred` is something that we support for injected
2896/// invariants (so far it is limited to ult), LHS in canonicalized form is
2897/// non-invariant and RHS is an invariant.
2899 ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue,
2900 BasicBlock *&IfFalse, const Loop &L) {
2901 if (!L.contains(IfTrue)) {
2902 Pred = ICmpInst::getInversePredicate(Pred);
2903 std::swap(IfTrue, IfFalse);
2904 }
2905
2906 // Move loop-invariant argument to RHS position.
2907 if (L.isLoopInvariant(LHS)) {
2908 Pred = ICmpInst::getSwappedPredicate(Pred);
2909 std::swap(LHS, RHS);
2910 }
2911
2912 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
2913 // Turn "x >=s 0" into "x <u UMIN_INT"
2914 Pred = ICmpInst::ICMP_ULT;
2916 RHS->getContext(),
2918 }
2919}
2920
2921/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
2922/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
2923/// injecting a loop-invariant condition.
2925 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
2926 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
2927 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
2928 return false;
2929 // TODO: Support other predicates.
2930 if (Pred != ICmpInst::ICMP_ULT)
2931 return false;
2932 // TODO: Support non-loop-exiting branches?
2933 if (!L.contains(IfTrue) || L.contains(IfFalse))
2934 return false;
2935 // FIXME: For some reason this causes problems with MSSA updates, need to
2936 // investigate why. So far, just don't unswitch latch.
2937 if (L.getHeader() == IfTrue)
2938 return false;
2939 return true;
2940}
2941
2942/// Returns true, if metadata on \p BI allows us to optimize branching into \p
2943/// TakenSucc via injection of invariant conditions. The branch should be not
2944/// enough and not previously unswitched, the information about this comes from
2945/// the metadata.
2947 const BasicBlock *TakenSucc) {
2948 // Skip branches that have already been unswithed this way. After successful
2949 // unswitching of injected condition, we will still have a copy of this loop
2950 // which looks exactly the same as original one. To prevent the 2nd attempt
2951 // of unswitching it in the same pass, mark this branch as "nothing to do
2952 // here".
2953 if (BI->hasMetadata("llvm.invariant.condition.injection.disabled"))
2954 return false;
2955 SmallVector<uint32_t> Weights;
2956 if (!extractBranchWeights(*BI, Weights))
2957 return false;
2959 BranchProbability LikelyTaken(T - 1, T);
2960
2961 assert(Weights.size() == 2 && "Unexpected profile data!");
2962 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
2963 auto Num = Weights[Idx];
2964 auto Denom = Weights[0] + Weights[1];
2965 // Degenerate or overflowed metadata.
2966 if (Denom == 0 || Num > Denom)
2967 return false;
2968 BranchProbability ActualTaken(Num, Denom);
2969 if (LikelyTaken > ActualTaken)
2970 return false;
2971 return true;
2972}
2973
2974/// Materialize pending invariant condition of the given candidate into IR. The
2975/// injected loop-invariant condition implies the original loop-variant branch
2976/// condition, so the materialization turns
2977///
2978/// loop_block:
2979/// ...
2980/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
2981///
2982/// into
2983///
2984/// preheader:
2985/// %invariant_cond = LHS pred RHS
2986/// ...
2987/// loop_block:
2988/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
2989/// OriginalCheck:
2990/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
2991/// ...
2992static NonTrivialUnswitchCandidate
2993injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
2994 DominatorTree &DT, LoopInfo &LI,
2995 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
2996 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
2997 BasicBlock *Preheader = L.getLoopPreheader();
2998 assert(Preheader && "Loop is not in simplified form?");
2999 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3000 "Unswitching branch of inner loop!");
3001
3002 auto Pred = Candidate.PendingInjection->Pred;
3003 auto *LHS = Candidate.PendingInjection->LHS;
3004 auto *RHS = Candidate.PendingInjection->RHS;
3005 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3006 auto *TI = cast<BranchInst>(Candidate.TI);
3007 auto *BB = Candidate.TI->getParent();
3008 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3009 : TI->getSuccessor(0);
3010 // FIXME: Remove this once limitation on successors is lifted.
3011 assert(L.contains(InLoopSucc) && "Not supported yet!");
3012 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3013 auto &Ctx = BB->getContext();
3014
3015 IRBuilder<> Builder(Preheader->getTerminator());
3016 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3017 if (LHS->getType() != RHS->getType()) {
3018 if (LHS->getType()->getIntegerBitWidth() <
3020 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3021 else
3022 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3023 }
3024 // Do not use builder here: CreateICmp may simplify this into a constant and
3025 // unswitching will break. Better optimize it away later.
3026 auto *InjectedCond =
3027 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3028 Preheader->getTerminator());
3029 auto *OldCond = TI->getCondition();
3030
3031 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3032 BB->getParent(), InLoopSucc);
3033 Builder.SetInsertPoint(TI);
3034 auto *InvariantBr =
3035 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3036
3037 Builder.SetInsertPoint(CheckBlock);
3038 auto *NewTerm = Builder.CreateCondBr(OldCond, InLoopSucc, OutOfLoopSucc);
3039
3040 TI->eraseFromParent();
3041 // Prevent infinite unswitching.
3042 NewTerm->setMetadata("llvm.invariant.condition.injection.disabled",
3043 MDNode::get(BB->getContext(), {}));
3044
3045 // Fixup phis.
3046 for (auto &I : *InLoopSucc) {
3047 auto *PN = dyn_cast<PHINode>(&I);
3048 if (!PN)
3049 break;
3050 auto *Inc = PN->getIncomingValueForBlock(BB);
3051 PN->addIncoming(Inc, CheckBlock);
3052 }
3053 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3054
3056 { DominatorTree::Insert, BB, CheckBlock },
3057 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3058 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3059 { DominatorTree::Delete, BB, OutOfLoopSucc }
3060 };
3061
3062 DT.applyUpdates(DTUpdates);
3063 if (MSSAU)
3064 MSSAU->applyUpdates(DTUpdates, DT);
3065 L.addBasicBlockToLoop(CheckBlock, LI);
3066
3067#ifndef NDEBUG
3068 DT.verify();
3069 LI.verify(DT);
3070 if (MSSAU && VerifyMemorySSA)
3071 MSSAU->getMemorySSA()->verifyMemorySSA();
3072#endif
3073
3074 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3075 // higher because we have just inserted a new block. Need to think how to
3076 // adjust the cost of injected candidates when it was first computed.
3077 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3078 << " and considering it for unswitching.");
3079 ++NumInvariantConditionsInjected;
3080 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3081 Candidate.Cost);
3082}
3083
3084/// Given chain of loop branch conditions looking like:
3085/// br (Variant < Invariant1)
3086/// br (Variant < Invariant2)
3087/// br (Variant < Invariant3)
3088/// ...
3089/// collect set of invariant conditions on which we want to unswitch, which
3090/// look like:
3091/// Invariant1 <= Invariant2
3092/// Invariant2 <= Invariant3
3093/// ...
3094/// Though they might not immediately exist in the IR, we can still inject them.
3096 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3098 const DominatorTree &DT) {
3099
3101 assert(ICmpInst::isStrictPredicate(Pred));
3102 if (Compares.size() < 2)
3103 return false;
3104 ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
3105 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3106 Next != Compares.end(); ++Prev, ++Next) {
3107 Value *LHS = Next->Invariant;
3108 Value *RHS = Prev->Invariant;
3109 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3110 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3111 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3112 std::nullopt, std::move(ToInject));
3113 UnswitchCandidates.push_back(std::move(Candidate));
3114 }
3115 return true;
3116}
3117
3118/// Collect unswitch candidates by invariant conditions that are not immediately
3119/// present in the loop. However, they can be injected into the code if we
3120/// decide it's profitable.
3121/// An example of such conditions is following:
3122///
3123/// for (...) {
3124/// x = load ...
3125/// if (! x <u C1) break;
3126/// if (! x <u C2) break;
3127/// <do something>
3128/// }
3129///
3130/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3131/// C2" automatically implies "x <u C2", so we can get rid of one of
3132/// loop-variant checks in unswitched loop version.
3135 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3136 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3137 const MemorySSAUpdater *MSSAU) {
3139 return false;
3140
3141 if (!DT.isReachableFromEntry(L.getHeader()))
3142 return false;
3143 auto *Latch = L.getLoopLatch();
3144 // Need to have a single latch and a preheader.
3145 if (!Latch)
3146 return false;
3147 assert(L.getLoopPreheader() && "Must have a preheader!");
3148
3150 // Traverse the conditions that dominate latch (and therefore dominate each
3151 // other).
3152 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3153 DTN = DTN->getIDom()) {
3155 Value *LHS = nullptr, *RHS = nullptr;
3156 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3157 auto *BB = DTN->getBlock();
3158 // Ignore inner loops.
3159 if (LI.getLoopFor(BB) != &L)
3160 continue;
3161 auto *Term = BB->getTerminator();
3162 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3163 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3164 continue;
3165 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3166 L);
3167 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3168 continue;
3169 if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
3170 continue;
3171 // Strip ZEXT for unsigned predicate.
3172 // TODO: once signed predicates are supported, also strip SEXT.
3173 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3174 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3175 LHS = Zext->getOperand(0);
3176 CandidatesULT[LHS].push_back(Desc);
3177 }
3178
3179 bool Found = false;
3180 for (auto &It : CandidatesULT)
3182 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3183 return Found;
3184}
3185
3187 if (!L.isSafeToClone())
3188 return false;
3189 for (auto *BB : L.blocks())
3190 for (auto &I : *BB) {
3191 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3192 return false;
3193 if (auto *CB = dyn_cast<CallBase>(&I)) {
3194 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3195 if (CB->isConvergent())
3196 return false;
3197 }
3198 }
3199
3200 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3201 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3202 // irreducible control flow into reducible control flow and introduce new
3203 // loops "out of thin air". If we ever discover important use cases for doing
3204 // this, we can add support to loop unswitch, but it is a lot of complexity
3205 // for what seems little or no real world benefit.
3206 LoopBlocksRPO RPOT(&L);
3207 RPOT.perform(&LI);
3208 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3209 return false;
3210
3212 L.getUniqueExitBlocks(ExitBlocks);
3213 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3214 // instruction as we don't know how to split those exit blocks.
3215 // FIXME: We should teach SplitBlock to handle this and remove this
3216 // restriction.
3217 for (auto *ExitBB : ExitBlocks) {
3218 auto *I = ExitBB->getFirstNonPHI();
3219 if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
3220 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3221 "in exit block\n");
3222 return false;
3223 }
3224 }
3225
3226 return true;
3227}
3228
3229static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3230 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3231 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3232 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3233 // Given that unswitching these terminators will require duplicating parts of
3234 // the loop, so we need to be able to model that cost. Compute the ephemeral
3235 // values and set up a data structure to hold per-BB costs. We cache each
3236 // block's cost so that we don't recompute this when considering different
3237 // subsets of the loop for duplication during unswitching.
3239 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3241
3242 // Compute the cost of each block, as well as the total loop cost. Also, bail
3243 // out if we see instructions which are incompatible with loop unswitching
3244 // (convergent, noduplicate, or cross-basic-block tokens).
3245 // FIXME: We might be able to safely handle some of these in non-duplicated
3246 // regions.
3248 L.getHeader()->getParent()->hasMinSize()
3251 InstructionCost LoopCost = 0;
3252 for (auto *BB : L.blocks()) {
3254 for (auto &I : *BB) {
3255 if (EphValues.count(&I))
3256 continue;
3258 }
3259 assert(Cost >= 0 && "Must not have negative costs!");
3260 LoopCost += Cost;
3261 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3262 BBCostMap[BB] = Cost;
3263 }
3264 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3265
3266 // Now we find the best candidate by searching for the one with the following
3267 // properties in order:
3268 //
3269 // 1) An unswitching cost below the threshold
3270 // 2) The smallest number of duplicated unswitch candidates (to avoid
3271 // creating redundant subsequent unswitching)
3272 // 3) The smallest cost after unswitching.
3273 //
3274 // We prioritize reducing fanout of unswitch candidates provided the cost
3275 // remains below the threshold because this has a multiplicative effect.
3276 //
3277 // This requires memoizing each dominator subtree to avoid redundant work.
3278 //
3279 // FIXME: Need to actually do the number of candidates part above.
3281 // Given a terminator which might be unswitched, computes the non-duplicated
3282 // cost for that terminator.
3283 auto ComputeUnswitchedCost = [&](Instruction &TI,
3284 bool FullUnswitch) -> InstructionCost {
3285 BasicBlock &BB = *TI.getParent();
3287
3289 for (BasicBlock *SuccBB : successors(&BB)) {
3290 // Don't count successors more than once.
3291 if (!Visited.insert(SuccBB).second)
3292 continue;
3293
3294 // If this is a partial unswitch candidate, then it must be a conditional
3295 // branch with a condition of either `or`, `and`, their corresponding
3296 // select forms or partially invariant instructions. In that case, one of
3297 // the successors is necessarily duplicated, so don't even try to remove
3298 // its cost.
3299 if (!FullUnswitch) {
3300 auto &BI = cast<BranchInst>(TI);
3301 Value *Cond = skipTrivialSelect(BI.getCondition());
3302 if (match(Cond, m_LogicalAnd())) {
3303 if (SuccBB == BI.getSuccessor(1))
3304 continue;
3305 } else if (match(Cond, m_LogicalOr())) {
3306 if (SuccBB == BI.getSuccessor(0))
3307 continue;
3308 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3309 SuccBB == BI.getSuccessor(0)) ||
3310 (!PartialIVInfo.KnownValue->isOneValue() &&
3311 SuccBB == BI.getSuccessor(1)))
3312 continue;
3313 }
3314
3315 // This successor's domtree will not need to be duplicated after
3316 // unswitching if the edge to the successor dominates it (and thus the
3317 // entire tree). This essentially means there is no other path into this
3318 // subtree and so it will end up live in only one clone of the loop.
3319 if (SuccBB->getUniquePredecessor() ||
3320 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3321 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3322 })) {
3323 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3324 assert(Cost <= LoopCost &&
3325 "Non-duplicated cost should never exceed total loop cost!");
3326 }
3327 }
3328
3329 // Now scale the cost by the number of unique successors minus one. We
3330 // subtract one because there is already at least one copy of the entire
3331 // loop. This is computing the new cost of unswitching a condition.
3332 // Note that guards always have 2 unique successors that are implicit and
3333 // will be materialized if we decide to unswitch it.
3334 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3335 assert(SuccessorsCount > 1 &&
3336 "Cannot unswitch a condition without multiple distinct successors!");
3337 return (LoopCost - Cost) * (SuccessorsCount - 1);
3338 };
3339
3340 std::optional<NonTrivialUnswitchCandidate> Best;
3341 for (auto &Candidate : UnswitchCandidates) {
3342 Instruction &TI = *Candidate.TI;
3343 ArrayRef<Value *> Invariants = Candidate.Invariants;
3344 BranchInst *BI = dyn_cast<BranchInst>(&TI);
3345 bool FullUnswitch =
3346 !BI || Candidate.hasPendingInjection() ||
3347 (Invariants.size() == 1 &&
3348 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3349 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3350 // Calculate cost multiplier which is a tool to limit potentially
3351 // exponential behavior of loop-unswitch.
3353 int CostMultiplier =
3354 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3355 assert(
3356 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3357 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3358 CandidateCost *= CostMultiplier;
3359 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3360 << " (multiplier: " << CostMultiplier << ")"
3361 << " for unswitch candidate: " << TI << "\n");
3362 } else {
3363 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3364 << " for unswitch candidate: " << TI << "\n");
3365 }
3366
3367 if (!Best || CandidateCost < Best->Cost) {
3368 Best = Candidate;
3369 Best->Cost = CandidateCost;
3370 }
3371 }
3372 assert(Best && "Must be!");
3373 return *Best;
3374}
3375
3377 Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
3379 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3381 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3382 // Collect all invariant conditions within this loop (as opposed to an inner
3383 // loop which would be handled when visiting that inner loop).
3385 IVConditionInfo PartialIVInfo;
3386 Instruction *PartialIVCondBranch = nullptr;
3387 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3388 PartialIVCondBranch, L, LI, AA, MSSAU);
3389 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3390 PartialIVCondBranch, L, DT, LI, AA,
3391 MSSAU);
3392 // If we didn't find any candidates, we're done.
3393 if (UnswitchCandidates.empty())
3394 return false;
3395
3396 LLVM_DEBUG(
3397 dbgs() << "Considering " << UnswitchCandidates.size()
3398 << " non-trivial loop invariant conditions for unswitching.\n");
3399
3400 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3401 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3402
3403 assert(Best.TI && "Failed to find loop unswitch candidate");
3404 assert(Best.Cost && "Failed to compute cost");
3405
3406 if (*Best.Cost >= UnswitchThreshold) {
3407 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3408 << "\n");
3409 return false;
3410 }
3411
3412 if (Best.hasPendingInjection())
3413 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3414 assert(!Best.hasPendingInjection() &&
3415 "All injections should have been done by now!");
3416
3417 if (Best.TI != PartialIVCondBranch)
3418 PartialIVInfo.InstToDuplicate.clear();
3419
3420 // If the best candidate is a guard, turn it into a branch.
3421 if (isGuard(Best.TI))
3422 Best.TI =
3423 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3424
3425 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3426 << ") terminator: " << *Best.TI << "\n");
3427 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3428 LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
3429 return true;
3430}
3431
3432/// Unswitch control flow predicated on loop invariant conditions.
3433///
3434/// This first hoists all branches or switches which are trivial (IE, do not
3435/// require duplicating any part of the loop) out of the loop body. It then
3436/// looks at other loop invariant control flows and tries to unswitch those as
3437/// well by cloning the loop if the result is small enough.
3438///
3439/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3440/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3441/// valid (i.e. its use is enabled).
3442///
3443/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3444/// true, we will attempt to do non-trivial unswitching as well as trivial
3445/// unswitching.
3446///
3447/// The `UnswitchCB` callback provided will be run after unswitching is
3448/// complete, with the first parameter set to `true` if the provided loop
3449/// remains a loop, and a list of new sibling loops created.
3450///
3451/// If `SE` is non-null, we will update that analysis based on the unswitching
3452/// done.
3453static bool
3455 AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
3456 bool NonTrivial,
3457 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3460 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3461 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3462 "Loops must be in LCSSA form before unswitching.");
3463
3464 // Must be in loop simplified form: we need a preheader and dedicated exits.
3465 if (!L.isLoopSimplifyForm())
3466 return false;
3467
3468 // Try trivial unswitch first before loop over other basic blocks in the loop.
3469 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3470 // If we unswitched successfully we will want to clean up the loop before
3471 // processing it further so just mark it as unswitched and return.
3472 UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3473 return true;
3474 }
3475
3476 // Check whether we should continue with non-trivial conditions.
3477 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3478 // unswitching for testing and debugging.
3479 // NonTrivial: Parameter that enables non-trivial unswitching for this
3480 // invocation of the transform. But this should be allowed only
3481 // for targets without branch divergence.
3482 //
3483 // FIXME: If divergence analysis becomes available to a loop
3484 // transform, we should allow unswitching for non-trivial uniform
3485 // branches even on targets that have divergence.
3486 // https://bugs.llvm.org/show_bug.cgi?id=48819
3487 bool ContinueWithNonTrivial =
3488 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
3489 if (!ContinueWithNonTrivial)
3490 return false;
3491
3492 // Skip non-trivial unswitching for optsize functions.
3493 if (L.getHeader()->getParent()->hasOptSize())
3494 return false;
3495
3496 // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
3497 // of the loops L is nested in, and of the loops nested in L are all cold.
3498 auto IsLoopNestCold = [&](const Loop *L) {
3499 // Check L and all of its parent loops.
3500 auto *Parent = L;
3501 while (Parent) {
3502 if (!PSI->isColdBlock(Parent->getHeader(), BFI))
3503 return false;
3504 Parent = Parent->getParentLoop();
3505 }
3506 // Next check all loops nested within L.
3508 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
3509 L->getSubLoops().end());
3510 while (!Worklist.empty()) {
3511 auto *CurLoop = Worklist.pop_back_val();
3512 if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
3513 return false;
3514 Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),
3515 CurLoop->getSubLoops().end());
3516 }
3517 return true;
3518 };
3519
3520 // Skip cold loops in cold loop nests, as unswitching them brings little
3521 // benefit but increases the code size
3522 if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
3523 LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
3524 return false;
3525 }
3526
3527 // Perform legality checks.
3529 return false;
3530
3531 // For non-trivial unswitching, because it often creates new loops, we rely on
3532 // the pass manager to iterate on the loops rather than trying to immediately
3533 // reach a fixed point. There is no substantial advantage to iterating
3534 // internally, and if any of the new loops are simplified enough to contain
3535 // trivial unswitching we want to prefer those.
3536
3537 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3538 // a partial unswitch when possible below the threshold.
3539 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
3540 DestroyLoopCB))
3541 return true;
3542
3543 // No other opportunities to unswitch.
3544 return false;
3545}
3546
3549 LPMUpdater &U) {
3550 Function &F = *L.getHeader()->getParent();
3551 (void)F;
3552 ProfileSummaryInfo *PSI = nullptr;
3553 if (auto OuterProxy =
3555 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3556 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3557 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3558 << "\n");
3559
3560 // Save the current loop name in a variable so that we can report it even
3561 // after it has been deleted.
3562 std::string LoopName = std::string(L.getName());
3563
3564 auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3565 bool PartiallyInvariant,
3566 ArrayRef<Loop *> NewLoops) {
3567 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3568 if (!NewLoops.empty())
3569 U.addSiblingLoops(NewLoops);
3570
3571 // If the current loop remains valid, we should revisit it to catch any
3572 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3573 if (CurrentLoopValid) {
3574 if (PartiallyInvariant) {
3575 // Mark the new loop as partially unswitched, to avoid unswitching on
3576 // the same condition again.
3577 auto &Context = L.getHeader()->getContext();
3578 MDNode *DisableUnswitchMD = MDNode::get(
3579 Context,
3580 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3582 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3583 {DisableUnswitchMD});
3584 L.setLoopID(NewLoopID);
3585 } else
3586 U.revisitCurrentLoop();
3587 } else
3588 U.markLoopAsDeleted(L, LoopName);
3589 };
3590
3591 auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
3592 U.markLoopAsDeleted(L, Name);
3593 };
3594
3595 std::optional<MemorySSAUpdater> MSSAU;
3596 if (AR.MSSA) {
3597 MSSAU = MemorySSAUpdater(AR.MSSA);
3598 if (VerifyMemorySSA)
3599 AR.MSSA->verifyMemorySSA();
3600 }
3601 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3602 UnswitchCB, &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI,
3603 DestroyLoopCB))
3604 return PreservedAnalyses::all();
3605
3606 if (AR.MSSA && VerifyMemorySSA)
3607 AR.MSSA->verifyMemorySSA();
3608
3609 // Historically this pass has had issues with the dominator tree so verify it
3610 // in asserts builds.
3611 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3612
3613 auto PA = getLoopPassPreservedAnalyses();
3614 if (AR.MSSA)
3615 PA.preserve<MemorySSAAnalysis>();
3616 return PA;
3617}
3618
3620 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3622 OS, MapClassName2PassName);
3623
3624 OS << '<';
3625 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3626 OS << (Trivial ? "" : "no-") << "trivial";
3627 OS << '>';
3628}
3629
3630namespace {
3631
3632class SimpleLoopUnswitchLegacyPass : public LoopPass {
3633 bool NonTrivial;
3634
3635public:
3636 static char ID; // Pass ID, replacement for typeid
3637
3638 explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3639 : LoopPass(ID), NonTrivial(NonTrivial) {
3642 }
3643
3644 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3645
3646 void getAnalysisUsage(AnalysisUsage &AU) const override {
3652 }
3653};
3654
3655} // end anonymous namespace
3656
3657bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3658 if (skipLoop(L))
3659 return false;
3660
3661 Function &F = *L->getHeader()->getParent();
3662
3663 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
3664 << "\n");
3665 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3666 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3667 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3668 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3669 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3670 MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3671 MemorySSAUpdater MSSAU(MSSA);
3672
3673 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3674 auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3675
3676 auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3677 ArrayRef<Loop *> NewLoops) {
3678 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3679 for (auto *NewL : NewLoops)
3680 LPM.addLoop(*NewL);
3681
3682 // If the current loop remains valid, re-add it to the queue. This is
3683 // a little wasteful as we'll finish processing the current loop as well,
3684 // but it is the best we can do in the old PM.
3685 if (CurrentLoopValid) {
3686 // If the current loop has been unswitched using a partially invariant
3687 // condition, we should not re-add the current loop to avoid unswitching
3688 // on the same condition again.
3689 if (!PartiallyInvariant)
3690 LPM.addLoop(*L);
3691 } else
3692 LPM.markLoopAsDeleted(*L);
3693 };
3694
3695 auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
3696 LPM.markLoopAsDeleted(L);
3697 };
3698
3699 if (VerifyMemorySSA)
3700 MSSA->verifyMemorySSA();
3701 bool Changed =
3702 unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
3703 &MSSAU, nullptr, nullptr, DestroyLoopCB);
3704
3705 if (VerifyMemorySSA)
3706 MSSA->verifyMemorySSA();
3707
3708 // Historically this pass has had issues with the dominator tree so verify it
3709 // in asserts builds.
3710 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
3711
3712 return Changed;
3713}
3714
3715char SimpleLoopUnswitchLegacyPass::ID = 0;
3716INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3717 "Simple unswitch loops", false, false)
3724INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3726
3728 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3729}
assume Assume Builder
SmallVector< MachineOperand, 4 > Cond
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
loops
Definition: LoopInfo.cpp:1177
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
LLVMContext & Context
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file contains the declarations for profiling metadata utility functions.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static void canonicalizeForInvariantConditionInjection(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
static const Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
simple loop unswitch
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Unswitch control flow predicated on loop invariant conditions.
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:152
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
iterator begin() const
Definition: ArrayRef.h:151
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:316
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:372
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:136
const Instruction & front() const
Definition: BasicBlock.h:326
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
size_t size() const
Definition: BasicBlock.h:324
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:468
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:341
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1353
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1358
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
This is an important base class in LLVM.
Definition: Constant.h:41
bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:110
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator begin()
Definition: DenseMap.h:75
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:79
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2424
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1043
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1398
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1420
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:257
const BasicBlock * getParent() const
Definition: Instruction.h:90
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:275
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1455
SymbolTableList< Instruction >::iterator insertInto(BasicBlock *ParentBB, SymbolTableList< Instruction >::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Definition: Instruction.cpp:98
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:110
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:182
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:202
BlockT * getHeader() const
Definition: LoopInfo.h:105
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:258
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:453
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:412
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:114
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:242
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:421
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:956
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:699
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1049
iterator end() const
Definition: LoopInfo.h:968
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:1019
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1030
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:999
iterator begin() const
Definition: LoopInfo.h:967
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:1096
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
StringRef getName() const
Definition: LoopInfo.h:891
Metadata node.
Definition: Metadata.h:943
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:497
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:986
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1862
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:765
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1058
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1750
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:208
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition: SmallPtrSet.h:93
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:301
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void reserve(size_type N)
Definition: SmallVector.h:667
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence() const
Return true if branch divergence exists.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
void push_back(EltTy NewVal)
bool empty() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getIntegerBitWidth() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:164
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:151
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:375
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:979
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:517
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:168
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:413
void stable_sort(R &&Range)
Definition: STLExtras.h:2063
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1839
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:537
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2129
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1043
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:382
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:968
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1744
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:89
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:71
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:256
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:2018
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1126
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2113
auto predecessors(const MachineBasicBlock *BB)
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1720
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:341
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:519
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:522
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:525
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:723
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371