LLVM  16.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1 ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/Sequence.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/Twine.h"
20 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/LoopPass.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/Constant.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/IRBuilder.h"
40 #include "llvm/IR/InstrTypes.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/PatternMatch.h"
45 #include "llvm/IR/Use.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/InitializePasses.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <iterator>
65 #include <numeric>
66 #include <utility>
67 
68 #define DEBUG_TYPE "simple-loop-unswitch"
69 
70 using namespace llvm;
71 using namespace llvm::PatternMatch;
72 
73 STATISTIC(NumBranches, "Number of branches unswitched");
74 STATISTIC(NumSwitches, "Number of switches unswitched");
75 STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
76 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
77 STATISTIC(
78  NumCostMultiplierSkipped,
79  "Number of unswitch candidates that had their cost multiplier skipped");
80 
82  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
83  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
84  "following the configuration passed into the pass."));
85 
86 static cl::opt<int>
87  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
88  cl::desc("The cost threshold for unswitching a loop."));
89 
91  "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
92  cl::desc("Enable unswitch cost multiplier that prohibits exponential "
93  "explosion in nontrivial unswitch."));
95  "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
96  cl::desc("Toplevel siblings divisor for cost multiplier."));
98  "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
99  cl::desc("Number of unswitch candidates that are ignored when calculating "
100  "cost multiplier."));
102  "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
103  cl::desc("If enabled, simple loop unswitching will also consider "
104  "llvm.experimental.guard intrinsics as unswitch candidates."));
106  "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
107  cl::init(false), cl::Hidden,
108  cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
109  "null checks to save time analyzing if we can keep it."));
110 static cl::opt<unsigned>
111  MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
112  cl::desc("Max number of memory uses to explore during "
113  "partial unswitching analysis"),
114  cl::init(100), cl::Hidden);
116  "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
117  cl::desc("If enabled, the freeze instruction will be added to condition "
118  "of loop unswitch to prevent miscompilation."));
119 
120 namespace {
121 struct NonTrivialUnswitchCandidate {
122  Instruction *TI = nullptr;
123  TinyPtrVector<Value *> Invariants;
125  NonTrivialUnswitchCandidate(Instruction *TI, ArrayRef<Value *> Invariants,
126  Optional<InstructionCost> Cost = None)
127  : TI(TI), Invariants(Invariants), Cost(Cost) {};
128 };
129 } // end anonymous namespace.
130 
131 // Helper to skip (select x, true, false), which matches both a logical AND and
132 // OR and can confuse code that tries to determine if \p Cond is either a
133 // logical AND or OR but not both.
135  Value *CondNext;
136  while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
137  Cond = CondNext;
138  return Cond;
139 }
140 
141 /// Collect all of the loop invariant input values transitively used by the
142 /// homogeneous instruction graph from a given root.
143 ///
144 /// This essentially walks from a root recursively through loop variant operands
145 /// which have perform the same logical operation (AND or OR) and finds all
146 /// inputs which are loop invariant. For some operations these can be
147 /// re-associated and unswitched out of the loop entirely.
150  const LoopInfo &LI) {
151  assert(!L.isLoopInvariant(&Root) &&
152  "Only need to walk the graph if root itself is not invariant.");
153  TinyPtrVector<Value *> Invariants;
154 
155  bool IsRootAnd = match(&Root, m_LogicalAnd());
156  bool IsRootOr = match(&Root, m_LogicalOr());
157 
158  // Build a worklist and recurse through operators collecting invariants.
161  Worklist.push_back(&Root);
162  Visited.insert(&Root);
163  do {
164  Instruction &I = *Worklist.pop_back_val();
165  for (Value *OpV : I.operand_values()) {
166  // Skip constants as unswitching isn't interesting for them.
167  if (isa<Constant>(OpV))
168  continue;
169 
170  // Add it to our result if loop invariant.
171  if (L.isLoopInvariant(OpV)) {
172  Invariants.push_back(OpV);
173  continue;
174  }
175 
176  // If not an instruction with the same opcode, nothing we can do.
177  Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
178 
179  if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
180  (IsRootOr && match(OpI, m_LogicalOr())))) {
181  // Visit this operand.
182  if (Visited.insert(OpI).second)
183  Worklist.push_back(OpI);
184  }
185  }
186  } while (!Worklist.empty());
187 
188  return Invariants;
189 }
190 
191 static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
192  Constant &Replacement) {
193  assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
194 
195  // Replace uses of LIC in the loop with the given constant.
196  // We use make_early_inc_range as set invalidates the iterator.
197  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
198  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
199 
200  // Replace this use within the loop body.
201  if (UserI && L.contains(UserI))
202  U.set(&Replacement);
203  }
204 }
205 
206 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
207 /// incoming values along this edge.
208 static bool areLoopExitPHIsLoopInvariant(const Loop &L,
209  const BasicBlock &ExitingBB,
210  const BasicBlock &ExitBB) {
211  for (const Instruction &I : ExitBB) {
212  auto *PN = dyn_cast<PHINode>(&I);
213  if (!PN)
214  // No more PHIs to check.
215  return true;
216 
217  // If the incoming value for this edge isn't loop invariant the unswitch
218  // won't be trivial.
219  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
220  return false;
221  }
222  llvm_unreachable("Basic blocks should never be empty!");
223 }
224 
225 /// Copy a set of loop invariant values \p ToDuplicate and insert them at the
226 /// end of \p BB and conditionally branch on the copied condition. We only
227 /// branch on a single value.
229  BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
230  BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
231  const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
232  IRBuilder<> IRB(&BB);
233 
234  SmallVector<Value *> FrozenInvariants;
235  for (Value *Inv : Invariants) {
236  if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
237  Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
238  FrozenInvariants.push_back(Inv);
239  }
240 
241  Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
242  : IRB.CreateAnd(FrozenInvariants);
243  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
244  Direction ? &NormalSucc : &UnswitchedSucc);
245 }
246 
247 /// Copy a set of loop invariant values, and conditionally branch on them.
249  BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
250  BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
251  MemorySSAUpdater *MSSAU) {
252  ValueToValueMapTy VMap;
253  for (auto *Val : reverse(ToDuplicate)) {
254  Instruction *Inst = cast<Instruction>(Val);
255  Instruction *NewInst = Inst->clone();
256  BB.getInstList().insert(BB.end(), NewInst);
257  RemapInstruction(NewInst, VMap,
259  VMap[Val] = NewInst;
260 
261  if (!MSSAU)
262  continue;
263 
264  MemorySSA *MSSA = MSSAU->getMemorySSA();
265  if (auto *MemUse =
266  dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
267  auto *DefiningAccess = MemUse->getDefiningAccess();
268  // Get the first defining access before the loop.
269  while (L.contains(DefiningAccess->getBlock())) {
270  // If the defining access is a MemoryPhi, get the incoming
271  // value for the pre-header as defining access.
272  if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
273  DefiningAccess =
274  MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
275  else
276  DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
277  }
278  MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
279  NewInst->getParent(),
281  }
282  }
283 
284  IRBuilder<> IRB(&BB);
285  Value *Cond = VMap[ToDuplicate[0]];
286  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
287  Direction ? &NormalSucc : &UnswitchedSucc);
288 }
289 
290 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
291 ///
292 /// Requires that the loop exit and unswitched basic block are the same, and
293 /// that the exiting block was a unique predecessor of that block. Rewrites the
294 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
295 /// PHI nodes from the old preheader that now contains the unswitched
296 /// terminator.
298  BasicBlock &OldExitingBB,
299  BasicBlock &OldPH) {
300  for (PHINode &PN : UnswitchedBB.phis()) {
301  // When the loop exit is directly unswitched we just need to update the
302  // incoming basic block. We loop to handle weird cases with repeated
303  // incoming blocks, but expect to typically only have one operand here.
304  for (auto i : seq<int>(0, PN.getNumOperands())) {
305  assert(PN.getIncomingBlock(i) == &OldExitingBB &&
306  "Found incoming block different from unique predecessor!");
307  PN.setIncomingBlock(i, &OldPH);
308  }
309  }
310 }
311 
312 /// Rewrite the PHI nodes in the loop exit basic block and the split off
313 /// unswitched block.
314 ///
315 /// Because the exit block remains an exit from the loop, this rewrites the
316 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
317 /// nodes into the unswitched basic block to select between the value in the
318 /// old preheader and the loop exit.
320  BasicBlock &UnswitchedBB,
321  BasicBlock &OldExitingBB,
322  BasicBlock &OldPH,
323  bool FullUnswitch) {
324  assert(&ExitBB != &UnswitchedBB &&
325  "Must have different loop exit and unswitched blocks!");
326  Instruction *InsertPt = &*UnswitchedBB.begin();
327  for (PHINode &PN : ExitBB.phis()) {
328  auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
329  PN.getName() + ".split", InsertPt);
330 
331  // Walk backwards over the old PHI node's inputs to minimize the cost of
332  // removing each one. We have to do this weird loop manually so that we
333  // create the same number of new incoming edges in the new PHI as we expect
334  // each case-based edge to be included in the unswitched switch in some
335  // cases.
336  // FIXME: This is really, really gross. It would be much cleaner if LLVM
337  // allowed us to create a single entry for a predecessor block without
338  // having separate entries for each "edge" even though these edges are
339  // required to produce identical results.
340  for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
341  if (PN.getIncomingBlock(i) != &OldExitingBB)
342  continue;
343 
344  Value *Incoming = PN.getIncomingValue(i);
345  if (FullUnswitch)
346  // No more edge from the old exiting block to the exit block.
347  PN.removeIncomingValue(i);
348 
349  NewPN->addIncoming(Incoming, &OldPH);
350  }
351 
352  // Now replace the old PHI with the new one and wire the old one in as an
353  // input to the new one.
354  PN.replaceAllUsesWith(NewPN);
355  NewPN->addIncoming(&PN, &ExitBB);
356  }
357 }
358 
359 /// Hoist the current loop up to the innermost loop containing a remaining exit.
360 ///
361 /// Because we've removed an exit from the loop, we may have changed the set of
362 /// loops reachable and need to move the current loop up the loop nest or even
363 /// to an entirely separate nest.
364 static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
365  DominatorTree &DT, LoopInfo &LI,
366  MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
367  // If the loop is already at the top level, we can't hoist it anywhere.
368  Loop *OldParentL = L.getParentLoop();
369  if (!OldParentL)
370  return;
371 
373  L.getExitBlocks(Exits);
374  Loop *NewParentL = nullptr;
375  for (auto *ExitBB : Exits)
376  if (Loop *ExitL = LI.getLoopFor(ExitBB))
377  if (!NewParentL || NewParentL->contains(ExitL))
378  NewParentL = ExitL;
379 
380  if (NewParentL == OldParentL)
381  return;
382 
383  // The new parent loop (if different) should always contain the old one.
384  if (NewParentL)
385  assert(NewParentL->contains(OldParentL) &&
386  "Can only hoist this loop up the nest!");
387 
388  // The preheader will need to move with the body of this loop. However,
389  // because it isn't in this loop we also need to update the primary loop map.
390  assert(OldParentL == LI.getLoopFor(&Preheader) &&
391  "Parent loop of this loop should contain this loop's preheader!");
392  LI.changeLoopFor(&Preheader, NewParentL);
393 
394  // Remove this loop from its old parent.
395  OldParentL->removeChildLoop(&L);
396 
397  // Add the loop either to the new parent or as a top-level loop.
398  if (NewParentL)
399  NewParentL->addChildLoop(&L);
400  else
401  LI.addTopLevelLoop(&L);
402 
403  // Remove this loops blocks from the old parent and every other loop up the
404  // nest until reaching the new parent. Also update all of these
405  // no-longer-containing loops to reflect the nesting change.
406  for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
407  OldContainingL = OldContainingL->getParentLoop()) {
408  llvm::erase_if(OldContainingL->getBlocksVector(),
409  [&](const BasicBlock *BB) {
410  return BB == &Preheader || L.contains(BB);
411  });
412 
413  OldContainingL->getBlocksSet().erase(&Preheader);
414  for (BasicBlock *BB : L.blocks())
415  OldContainingL->getBlocksSet().erase(BB);
416 
417  // Because we just hoisted a loop out of this one, we have essentially
418  // created new exit paths from it. That means we need to form LCSSA PHI
419  // nodes for values used in the no-longer-nested loop.
420  formLCSSA(*OldContainingL, DT, &LI, SE);
421 
422  // We shouldn't need to form dedicated exits because the exit introduced
423  // here is the (just split by unswitching) preheader. However, after trivial
424  // unswitching it is possible to get new non-dedicated exits out of parent
425  // loop so let's conservatively form dedicated exit blocks and figure out
426  // if we can optimize later.
427  formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
428  /*PreserveLCSSA*/ true);
429  }
430 }
431 
432 // Return the top-most loop containing ExitBB and having ExitBB as exiting block
433 // or the loop containing ExitBB, if there is no parent loop containing ExitBB
434 // as exiting block.
435 static const Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
436  const LoopInfo &LI) {
437  const Loop *TopMost = LI.getLoopFor(ExitBB);
438  const Loop *Current = TopMost;
439  while (Current) {
440  if (Current->isLoopExiting(ExitBB))
441  TopMost = Current;
442  Current = Current->getParentLoop();
443  }
444  return TopMost;
445 }
446 
447 /// Unswitch a trivial branch if the condition is loop invariant.
448 ///
449 /// This routine should only be called when loop code leading to the branch has
450 /// been validated as trivial (no side effects). This routine checks if the
451 /// condition is invariant and one of the successors is a loop exit. This
452 /// allows us to unswitch without duplicating the loop, making it trivial.
453 ///
454 /// If this routine fails to unswitch the branch it returns false.
455 ///
456 /// If the branch can be unswitched, this routine splits the preheader and
457 /// hoists the branch above that split. Preserves loop simplified form
458 /// (splitting the exit block as necessary). It simplifies the branch within
459 /// the loop to an unconditional branch but doesn't remove it entirely. Further
460 /// cleanup can be done with some simplifycfg like pass.
461 ///
462 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
463 /// invalidated by this.
465  LoopInfo &LI, ScalarEvolution *SE,
466  MemorySSAUpdater *MSSAU) {
467  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
468  LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
469 
470  // The loop invariant values that we want to unswitch.
471  TinyPtrVector<Value *> Invariants;
472 
473  // When true, we're fully unswitching the branch rather than just unswitching
474  // some input conditions to the branch.
475  bool FullUnswitch = false;
476 
478  if (L.isLoopInvariant(Cond)) {
479  Invariants.push_back(Cond);
480  FullUnswitch = true;
481  } else {
482  if (auto *CondInst = dyn_cast<Instruction>(Cond))
483  Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
484  if (Invariants.empty()) {
485  LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
486  return false;
487  }
488  }
489 
490  // Check that one of the branch's successors exits, and which one.
491  bool ExitDirection = true;
492  int LoopExitSuccIdx = 0;
493  auto *LoopExitBB = BI.getSuccessor(0);
494  if (L.contains(LoopExitBB)) {
495  ExitDirection = false;
496  LoopExitSuccIdx = 1;
497  LoopExitBB = BI.getSuccessor(1);
498  if (L.contains(LoopExitBB)) {
499  LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
500  return false;
501  }
502  }
503  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
504  auto *ParentBB = BI.getParent();
505  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
506  LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
507  return false;
508  }
509 
510  // When unswitching only part of the branch's condition, we need the exit
511  // block to be reached directly from the partially unswitched input. This can
512  // be done when the exit block is along the true edge and the branch condition
513  // is a graph of `or` operations, or the exit block is along the false edge
514  // and the condition is a graph of `and` operations.
515  if (!FullUnswitch) {
516  if (ExitDirection ? !match(Cond, m_LogicalOr())
517  : !match(Cond, m_LogicalAnd())) {
518  LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
519  "non-full unswitch!\n");
520  return false;
521  }
522  }
523 
524  LLVM_DEBUG({
525  dbgs() << " unswitching trivial invariant conditions for: " << BI
526  << "\n";
527  for (Value *Invariant : Invariants) {
528  dbgs() << " " << *Invariant << " == true";
529  if (Invariant != Invariants.back())
530  dbgs() << " ||";
531  dbgs() << "\n";
532  }
533  });
534 
535  // If we have scalar evolutions, we need to invalidate them including this
536  // loop, the loop containing the exit block and the topmost parent loop
537  // exiting via LoopExitBB.
538  if (SE) {
539  if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
540  SE->forgetLoop(ExitL);
541  else
542  // Forget the entire nest as this exits the entire nest.
543  SE->forgetTopmostLoop(&L);
545  }
546 
547  if (MSSAU && VerifyMemorySSA)
548  MSSAU->getMemorySSA()->verifyMemorySSA();
549 
550  // Split the preheader, so that we know that there is a safe place to insert
551  // the conditional branch. We will change the preheader to have a conditional
552  // branch on LoopCond.
553  BasicBlock *OldPH = L.getLoopPreheader();
554  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
555 
556  // Now that we have a place to insert the conditional branch, create a place
557  // to branch to: this is the exit block out of the loop that we are
558  // unswitching. We need to split this if there are other loop predecessors.
559  // Because the loop is in simplified form, *any* other predecessor is enough.
560  BasicBlock *UnswitchedBB;
561  if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
562  assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
563  "A branch's parent isn't a predecessor!");
564  UnswitchedBB = LoopExitBB;
565  } else {
566  UnswitchedBB =
567  SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
568  }
569 
570  if (MSSAU && VerifyMemorySSA)
571  MSSAU->getMemorySSA()->verifyMemorySSA();
572 
573  // Actually move the invariant uses into the unswitched position. If possible,
574  // we do this by moving the instructions, but when doing partial unswitching
575  // we do it by building a new merge of the values in the unswitched position.
576  OldPH->getTerminator()->eraseFromParent();
577  if (FullUnswitch) {
578  // If fully unswitching, we can use the existing branch instruction.
579  // Splice it into the old PH to gate reaching the new preheader and re-point
580  // its successors.
581  OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
582  BI);
583  BI.setCondition(Cond);
584  if (MSSAU) {
585  // Temporarily clone the terminator, to make MSSA update cheaper by
586  // separating "insert edge" updates from "remove edge" ones.
587  ParentBB->getInstList().push_back(BI.clone());
588  } else {
589  // Create a new unconditional branch that will continue the loop as a new
590  // terminator.
591  BranchInst::Create(ContinueBB, ParentBB);
592  }
593  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
594  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
595  } else {
596  // Only unswitching a subset of inputs to the condition, so we will need to
597  // build a new branch that merges the invariant inputs.
598  if (ExitDirection)
600  "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
601  "condition!");
602  else
604  "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
605  " condition!");
607  *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
608  FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
609  }
610 
611  // Update the dominator tree with the added edge.
612  DT.insertEdge(OldPH, UnswitchedBB);
613 
614  // After the dominator tree was updated with the added edge, update MemorySSA
615  // if available.
616  if (MSSAU) {
618  Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
619  MSSAU->applyInsertUpdates(Updates, DT);
620  }
621 
622  // Finish updating dominator tree and memory ssa for full unswitch.
623  if (FullUnswitch) {
624  if (MSSAU) {
625  // Remove the cloned branch instruction.
626  ParentBB->getTerminator()->eraseFromParent();
627  // Create unconditional branch now.
628  BranchInst::Create(ContinueBB, ParentBB);
629  MSSAU->removeEdge(ParentBB, LoopExitBB);
630  }
631  DT.deleteEdge(ParentBB, LoopExitBB);
632  }
633 
634  if (MSSAU && VerifyMemorySSA)
635  MSSAU->getMemorySSA()->verifyMemorySSA();
636 
637  // Rewrite the relevant PHI nodes.
638  if (UnswitchedBB == LoopExitBB)
639  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
640  else
641  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
642  *ParentBB, *OldPH, FullUnswitch);
643 
644  // The constant we can replace all of our invariants with inside the loop
645  // body. If any of the invariants have a value other than this the loop won't
646  // be entered.
647  ConstantInt *Replacement = ExitDirection
650 
651  // Since this is an i1 condition we can also trivially replace uses of it
652  // within the loop with a constant.
653  for (Value *Invariant : Invariants)
654  replaceLoopInvariantUses(L, Invariant, *Replacement);
655 
656  // If this was full unswitching, we may have changed the nesting relationship
657  // for this loop so hoist it to its correct parent if needed.
658  if (FullUnswitch)
659  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
660 
661  if (MSSAU && VerifyMemorySSA)
662  MSSAU->getMemorySSA()->verifyMemorySSA();
663 
664  LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
665  ++NumTrivial;
666  ++NumBranches;
667  return true;
668 }
669 
670 /// Unswitch a trivial switch if the condition is loop invariant.
671 ///
672 /// This routine should only be called when loop code leading to the switch has
673 /// been validated as trivial (no side effects). This routine checks if the
674 /// condition is invariant and that at least one of the successors is a loop
675 /// exit. This allows us to unswitch without duplicating the loop, making it
676 /// trivial.
677 ///
678 /// If this routine fails to unswitch the switch it returns false.
679 ///
680 /// If the switch can be unswitched, this routine splits the preheader and
681 /// copies the switch above that split. If the default case is one of the
682 /// exiting cases, it copies the non-exiting cases and points them at the new
683 /// preheader. If the default case is not exiting, it copies the exiting cases
684 /// and points the default at the preheader. It preserves loop simplified form
685 /// (splitting the exit blocks as necessary). It simplifies the switch within
686 /// the loop by removing now-dead cases. If the default case is one of those
687 /// unswitched, it replaces its destination with a new basic block containing
688 /// only unreachable. Such basic blocks, while technically loop exits, are not
689 /// considered for unswitching so this is a stable transform and the same
690 /// switch will not be revisited. If after unswitching there is only a single
691 /// in-loop successor, the switch is further simplified to an unconditional
692 /// branch. Still more cleanup can be done with some simplifycfg like pass.
693 ///
694 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
695 /// invalidated by this.
697  LoopInfo &LI, ScalarEvolution *SE,
698  MemorySSAUpdater *MSSAU) {
699  LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
700  Value *LoopCond = SI.getCondition();
701 
702  // If this isn't switching on an invariant condition, we can't unswitch it.
703  if (!L.isLoopInvariant(LoopCond))
704  return false;
705 
706  auto *ParentBB = SI.getParent();
707 
708  // The same check must be used both for the default and the exit cases. We
709  // should never leave edges from the switch instruction to a basic block that
710  // we are unswitching, hence the condition used to determine the default case
711  // needs to also be used to populate ExitCaseIndices, which is then used to
712  // remove cases from the switch.
713  auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
714  // BBToCheck is not an exit block if it is inside loop L.
715  if (L.contains(&BBToCheck))
716  return false;
717  // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
718  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
719  return false;
720  // We do not unswitch a block that only has an unreachable statement, as
721  // it's possible this is a previously unswitched block. Only unswitch if
722  // either the terminator is not unreachable, or, if it is, it's not the only
723  // instruction in the block.
724  auto *TI = BBToCheck.getTerminator();
725  bool isUnreachable = isa<UnreachableInst>(TI);
726  return !isUnreachable ||
727  (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
728  };
729 
730  SmallVector<int, 4> ExitCaseIndices;
731  for (auto Case : SI.cases())
732  if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
733  ExitCaseIndices.push_back(Case.getCaseIndex());
734  BasicBlock *DefaultExitBB = nullptr;
737  if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
738  DefaultExitBB = SI.getDefaultDest();
739  } else if (ExitCaseIndices.empty())
740  return false;
741 
742  LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
743 
744  if (MSSAU && VerifyMemorySSA)
745  MSSAU->getMemorySSA()->verifyMemorySSA();
746 
747  // We may need to invalidate SCEVs for the outermost loop reached by any of
748  // the exits.
749  Loop *OuterL = &L;
750 
751  if (DefaultExitBB) {
752  // Clear out the default destination temporarily to allow accurate
753  // predecessor lists to be examined below.
754  SI.setDefaultDest(nullptr);
755  // Check the loop containing this exit.
756  Loop *ExitL = LI.getLoopFor(DefaultExitBB);
757  if (!ExitL || ExitL->contains(OuterL))
758  OuterL = ExitL;
759  }
760 
761  // Store the exit cases into a separate data structure and remove them from
762  // the switch.
763  SmallVector<std::tuple<ConstantInt *, BasicBlock *,
765  4> ExitCases;
766  ExitCases.reserve(ExitCaseIndices.size());
768  // We walk the case indices backwards so that we remove the last case first
769  // and don't disrupt the earlier indices.
770  for (unsigned Index : reverse(ExitCaseIndices)) {
771  auto CaseI = SI.case_begin() + Index;
772  // Compute the outer loop from this exit.
773  Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
774  if (!ExitL || ExitL->contains(OuterL))
775  OuterL = ExitL;
776  // Save the value of this case.
777  auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
778  ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
779  // Delete the unswitched cases.
780  SIW.removeCase(CaseI);
781  }
782 
783  if (SE) {
784  if (OuterL)
785  SE->forgetLoop(OuterL);
786  else
787  SE->forgetTopmostLoop(&L);
788  }
789 
790  // Check if after this all of the remaining cases point at the same
791  // successor.
792  BasicBlock *CommonSuccBB = nullptr;
793  if (SI.getNumCases() > 0 &&
794  all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
795  return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
796  }))
797  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
798  if (!DefaultExitBB) {
799  // If we're not unswitching the default, we need it to match any cases to
800  // have a common successor or if we have no cases it is the common
801  // successor.
802  if (SI.getNumCases() == 0)
803  CommonSuccBB = SI.getDefaultDest();
804  else if (SI.getDefaultDest() != CommonSuccBB)
805  CommonSuccBB = nullptr;
806  }
807 
808  // Split the preheader, so that we know that there is a safe place to insert
809  // the switch.
810  BasicBlock *OldPH = L.getLoopPreheader();
811  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
812  OldPH->getTerminator()->eraseFromParent();
813 
814  // Now add the unswitched switch.
815  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
816  SwitchInstProfUpdateWrapper NewSIW(*NewSI);
817 
818  // Rewrite the IR for the unswitched basic blocks. This requires two steps.
819  // First, we split any exit blocks with remaining in-loop predecessors. Then
820  // we update the PHIs in one of two ways depending on if there was a split.
821  // We walk in reverse so that we split in the same order as the cases
822  // appeared. This is purely for convenience of reading the resulting IR, but
823  // it doesn't cost anything really.
824  SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
826  // Handle the default exit if necessary.
827  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
828  // ranges aren't quite powerful enough yet.
829  if (DefaultExitBB) {
830  if (pred_empty(DefaultExitBB)) {
831  UnswitchedExitBBs.insert(DefaultExitBB);
832  rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
833  } else {
834  auto *SplitBB =
835  SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
836  rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
837  *ParentBB, *OldPH,
838  /*FullUnswitch*/ true);
839  DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
840  }
841  }
842  // Note that we must use a reference in the for loop so that we update the
843  // container.
844  for (auto &ExitCase : reverse(ExitCases)) {
845  // Grab a reference to the exit block in the pair so that we can update it.
846  BasicBlock *ExitBB = std::get<1>(ExitCase);
847 
848  // If this case is the last edge into the exit block, we can simply reuse it
849  // as it will no longer be a loop exit. No mapping necessary.
850  if (pred_empty(ExitBB)) {
851  // Only rewrite once.
852  if (UnswitchedExitBBs.insert(ExitBB).second)
853  rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
854  continue;
855  }
856 
857  // Otherwise we need to split the exit block so that we retain an exit
858  // block from the loop and a target for the unswitched condition.
859  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
860  if (!SplitExitBB) {
861  // If this is the first time we see this, do the split and remember it.
862  SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
863  rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
864  *ParentBB, *OldPH,
865  /*FullUnswitch*/ true);
866  }
867  // Update the case pair to point to the split block.
868  std::get<1>(ExitCase) = SplitExitBB;
869  }
870 
871  // Now add the unswitched cases. We do this in reverse order as we built them
872  // in reverse order.
873  for (auto &ExitCase : reverse(ExitCases)) {
874  ConstantInt *CaseVal = std::get<0>(ExitCase);
875  BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
876 
877  NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
878  }
879 
880  // If the default was unswitched, re-point it and add explicit cases for
881  // entering the loop.
882  if (DefaultExitBB) {
883  NewSIW->setDefaultDest(DefaultExitBB);
884  NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
885 
886  // We removed all the exit cases, so we just copy the cases to the
887  // unswitched switch.
888  for (const auto &Case : SI.cases())
889  NewSIW.addCase(Case.getCaseValue(), NewPH,
891  } else if (DefaultCaseWeight) {
892  // We have to set branch weight of the default case.
893  uint64_t SW = *DefaultCaseWeight;
894  for (const auto &Case : SI.cases()) {
895  auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
896  assert(W &&
897  "case weight must be defined as default case weight is defined");
898  SW += *W;
899  }
900  NewSIW.setSuccessorWeight(0, SW);
901  }
902 
903  // If we ended up with a common successor for every path through the switch
904  // after unswitching, rewrite it to an unconditional branch to make it easy
905  // to recognize. Otherwise we potentially have to recognize the default case
906  // pointing at unreachable and other complexity.
907  if (CommonSuccBB) {
908  BasicBlock *BB = SI.getParent();
909  // We may have had multiple edges to this common successor block, so remove
910  // them as predecessors. We skip the first one, either the default or the
911  // actual first case.
912  bool SkippedFirst = DefaultExitBB == nullptr;
913  for (auto Case : SI.cases()) {
914  assert(Case.getCaseSuccessor() == CommonSuccBB &&
915  "Non-common successor!");
916  (void)Case;
917  if (!SkippedFirst) {
918  SkippedFirst = true;
919  continue;
920  }
921  CommonSuccBB->removePredecessor(BB,
922  /*KeepOneInputPHIs*/ true);
923  }
924  // Now nuke the switch and replace it with a direct branch.
925  SIW.eraseFromParent();
926  BranchInst::Create(CommonSuccBB, BB);
927  } else if (DefaultExitBB) {
928  assert(SI.getNumCases() > 0 &&
929  "If we had no cases we'd have a common successor!");
930  // Move the last case to the default successor. This is valid as if the
931  // default got unswitched it cannot be reached. This has the advantage of
932  // being simple and keeping the number of edges from this switch to
933  // successors the same, and avoiding any PHI update complexity.
934  auto LastCaseI = std::prev(SI.case_end());
935 
936  SI.setDefaultDest(LastCaseI->getCaseSuccessor());
937  SIW.setSuccessorWeight(
938  0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
939  SIW.removeCase(LastCaseI);
940  }
941 
942  // Walk the unswitched exit blocks and the unswitched split blocks and update
943  // the dominator tree based on the CFG edits. While we are walking unordered
944  // containers here, the API for applyUpdates takes an unordered list of
945  // updates and requires them to not contain duplicates.
947  for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
948  DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
949  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
950  }
951  for (auto SplitUnswitchedPair : SplitExitBBMap) {
952  DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
953  DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
954  }
955 
956  if (MSSAU) {
957  MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
958  if (VerifyMemorySSA)
959  MSSAU->getMemorySSA()->verifyMemorySSA();
960  } else {
961  DT.applyUpdates(DTUpdates);
962  }
963 
965 
966  // We may have changed the nesting relationship for this loop so hoist it to
967  // its correct parent if needed.
968  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
969 
970  if (MSSAU && VerifyMemorySSA)
971  MSSAU->getMemorySSA()->verifyMemorySSA();
972 
973  ++NumTrivial;
974  ++NumSwitches;
975  LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
976  return true;
977 }
978 
979 /// This routine scans the loop to find a branch or switch which occurs before
980 /// any side effects occur. These can potentially be unswitched without
981 /// duplicating the loop. If a branch or switch is successfully unswitched the
982 /// scanning continues to see if subsequent branches or switches have become
983 /// trivial. Once all trivial candidates have been unswitched, this routine
984 /// returns.
985 ///
986 /// The return value indicates whether anything was unswitched (and therefore
987 /// changed).
988 ///
989 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
990 /// invalidated by this.
992  LoopInfo &LI, ScalarEvolution *SE,
993  MemorySSAUpdater *MSSAU) {
994  bool Changed = false;
995 
996  // If loop header has only one reachable successor we should keep looking for
997  // trivial condition candidates in the successor as well. An alternative is
998  // to constant fold conditions and merge successors into loop header (then we
999  // only need to check header's terminator). The reason for not doing this in
1000  // LoopUnswitch pass is that it could potentially break LoopPassManager's
1001  // invariants. Folding dead branches could either eliminate the current loop
1002  // or make other loops unreachable. LCSSA form might also not be preserved
1003  // after deleting branches. The following code keeps traversing loop header's
1004  // successors until it finds the trivial condition candidate (condition that
1005  // is not a constant). Since unswitching generates branches with constant
1006  // conditions, this scenario could be very common in practice.
1007  BasicBlock *CurrentBB = L.getHeader();
1009  Visited.insert(CurrentBB);
1010  do {
1011  // Check if there are any side-effecting instructions (e.g. stores, calls,
1012  // volatile loads) in the part of the loop that the code *would* execute
1013  // without unswitching.
1014  if (MSSAU) // Possible early exit with MSSA
1015  if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1016  if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1017  return Changed;
1018  if (llvm::any_of(*CurrentBB,
1019  [](Instruction &I) { return I.mayHaveSideEffects(); }))
1020  return Changed;
1021 
1022  Instruction *CurrentTerm = CurrentBB->getTerminator();
1023 
1024  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1025  // Don't bother trying to unswitch past a switch with a constant
1026  // condition. This should be removed prior to running this pass by
1027  // simplifycfg.
1028  if (isa<Constant>(SI->getCondition()))
1029  return Changed;
1030 
1031  if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1032  // Couldn't unswitch this one so we're done.
1033  return Changed;
1034 
1035  // Mark that we managed to unswitch something.
1036  Changed = true;
1037 
1038  // If unswitching turned the terminator into an unconditional branch then
1039  // we can continue. The unswitching logic specifically works to fold any
1040  // cases it can into an unconditional branch to make it easier to
1041  // recognize here.
1042  auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1043  if (!BI || BI->isConditional())
1044  return Changed;
1045 
1046  CurrentBB = BI->getSuccessor(0);
1047  continue;
1048  }
1049 
1050  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1051  if (!BI)
1052  // We do not understand other terminator instructions.
1053  return Changed;
1054 
1055  // Don't bother trying to unswitch past an unconditional branch or a branch
1056  // with a constant value. These should be removed by simplifycfg prior to
1057  // running this pass.
1058  if (!BI->isConditional() ||
1059  isa<Constant>(skipTrivialSelect(BI->getCondition())))
1060  return Changed;
1061 
1062  // Found a trivial condition candidate: non-foldable conditional branch. If
1063  // we fail to unswitch this, we can't do anything else that is trivial.
1064  if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1065  return Changed;
1066 
1067  // Mark that we managed to unswitch something.
1068  Changed = true;
1069 
1070  // If we only unswitched some of the conditions feeding the branch, we won't
1071  // have collapsed it to a single successor.
1072  BI = cast<BranchInst>(CurrentBB->getTerminator());
1073  if (BI->isConditional())
1074  return Changed;
1075 
1076  // Follow the newly unconditional branch into its successor.
1077  CurrentBB = BI->getSuccessor(0);
1078 
1079  // When continuing, if we exit the loop or reach a previous visited block,
1080  // then we can not reach any trivial condition candidates (unfoldable
1081  // branch instructions or switch instructions) and no unswitch can happen.
1082  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1083 
1084  return Changed;
1085 }
1086 
1087 /// Build the cloned blocks for an unswitched copy of the given loop.
1088 ///
1089 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1090 /// after the split block (`SplitBB`) that will be used to select between the
1091 /// cloned and original loop.
1092 ///
1093 /// This routine handles cloning all of the necessary loop blocks and exit
1094 /// blocks including rewriting their instructions and the relevant PHI nodes.
1095 /// Any loop blocks or exit blocks which are dominated by a different successor
1096 /// than the one for this clone of the loop blocks can be trivially skipped. We
1097 /// use the `DominatingSucc` map to determine whether a block satisfies that
1098 /// property with a simple map lookup.
1099 ///
1100 /// It also correctly creates the unconditional branch in the cloned
1101 /// unswitched parent block to only point at the unswitched successor.
1102 ///
1103 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1104 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
1105 /// the cloned blocks (and their loops) are left without full `LoopInfo`
1106 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1107 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
1108 /// instead the caller must recompute an accurate DT. It *does* correctly
1109 /// update the `AssumptionCache` provided in `AC`.
1111  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1112  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1113  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1114  const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1115  ValueToValueMapTy &VMap,
1117  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1118  ScalarEvolution *SE) {
1119  SmallVector<BasicBlock *, 4> NewBlocks;
1120  NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1121 
1122  // We will need to clone a bunch of blocks, wrap up the clone operation in
1123  // a helper.
1124  auto CloneBlock = [&](BasicBlock *OldBB) {
1125  // Clone the basic block and insert it before the new preheader.
1126  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1127  NewBB->moveBefore(LoopPH);
1128 
1129  // Record this block and the mapping.
1130  NewBlocks.push_back(NewBB);
1131  VMap[OldBB] = NewBB;
1132 
1133  return NewBB;
1134  };
1135 
1136  // We skip cloning blocks when they have a dominating succ that is not the
1137  // succ we are cloning for.
1138  auto SkipBlock = [&](BasicBlock *BB) {
1139  auto It = DominatingSucc.find(BB);
1140  return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1141  };
1142 
1143  // First, clone the preheader.
1144  auto *ClonedPH = CloneBlock(LoopPH);
1145 
1146  // Then clone all the loop blocks, skipping the ones that aren't necessary.
1147  for (auto *LoopBB : L.blocks())
1148  if (!SkipBlock(LoopBB))
1149  CloneBlock(LoopBB);
1150 
1151  // Split all the loop exit edges so that when we clone the exit blocks, if
1152  // any of the exit blocks are *also* a preheader for some other loop, we
1153  // don't create multiple predecessors entering the loop header.
1154  for (auto *ExitBB : ExitBlocks) {
1155  if (SkipBlock(ExitBB))
1156  continue;
1157 
1158  // When we are going to clone an exit, we don't need to clone all the
1159  // instructions in the exit block and we want to ensure we have an easy
1160  // place to merge the CFG, so split the exit first. This is always safe to
1161  // do because there cannot be any non-loop predecessors of a loop exit in
1162  // loop simplified form.
1163  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1164 
1165  // Rearrange the names to make it easier to write test cases by having the
1166  // exit block carry the suffix rather than the merge block carrying the
1167  // suffix.
1168  MergeBB->takeName(ExitBB);
1169  ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1170 
1171  // Now clone the original exit block.
1172  auto *ClonedExitBB = CloneBlock(ExitBB);
1173  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1174  "Exit block should have been split to have one successor!");
1175  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1176  "Cloned exit block has the wrong successor!");
1177 
1178  // Remap any cloned instructions and create a merge phi node for them.
1179  for (auto ZippedInsts : llvm::zip_first(
1180  llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1181  llvm::make_range(ClonedExitBB->begin(),
1182  std::prev(ClonedExitBB->end())))) {
1183  Instruction &I = std::get<0>(ZippedInsts);
1184  Instruction &ClonedI = std::get<1>(ZippedInsts);
1185 
1186  // The only instructions in the exit block should be PHI nodes and
1187  // potentially a landing pad.
1188  assert(
1189  (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1190  "Bad instruction in exit block!");
1191  // We should have a value map between the instruction and its clone.
1192  assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1193 
1194  // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1195  if (SE && isa<PHINode>(I))
1196  SE->forgetValue(&I);
1197 
1198  auto *MergePN =
1199  PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1200  &*MergeBB->getFirstInsertionPt());
1201  I.replaceAllUsesWith(MergePN);
1202  MergePN->addIncoming(&I, ExitBB);
1203  MergePN->addIncoming(&ClonedI, ClonedExitBB);
1204  }
1205  }
1206 
1207  // Rewrite the instructions in the cloned blocks to refer to the instructions
1208  // in the cloned blocks. We have to do this as a second pass so that we have
1209  // everything available. Also, we have inserted new instructions which may
1210  // include assume intrinsics, so we update the assumption cache while
1211  // processing this.
1212  for (auto *ClonedBB : NewBlocks)
1213  for (Instruction &I : *ClonedBB) {
1214  RemapInstruction(&I, VMap,
1216  if (auto *II = dyn_cast<AssumeInst>(&I))
1217  AC.registerAssumption(II);
1218  }
1219 
1220  // Update any PHI nodes in the cloned successors of the skipped blocks to not
1221  // have spurious incoming values.
1222  for (auto *LoopBB : L.blocks())
1223  if (SkipBlock(LoopBB))
1224  for (auto *SuccBB : successors(LoopBB))
1225  if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1226  for (PHINode &PN : ClonedSuccBB->phis())
1227  PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1228 
1229  // Remove the cloned parent as a predecessor of any successor we ended up
1230  // cloning other than the unswitched one.
1231  auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1232  for (auto *SuccBB : successors(ParentBB)) {
1233  if (SuccBB == UnswitchedSuccBB)
1234  continue;
1235 
1236  auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1237  if (!ClonedSuccBB)
1238  continue;
1239 
1240  ClonedSuccBB->removePredecessor(ClonedParentBB,
1241  /*KeepOneInputPHIs*/ true);
1242  }
1243 
1244  // Replace the cloned branch with an unconditional branch to the cloned
1245  // unswitched successor.
1246  auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1247  Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1248  // Trivial Simplification. If Terminator is a conditional branch and
1249  // condition becomes dead - erase it.
1250  Value *ClonedConditionToErase = nullptr;
1251  if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1252  ClonedConditionToErase = BI->getCondition();
1253  else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1254  ClonedConditionToErase = SI->getCondition();
1255 
1256  ClonedTerminator->eraseFromParent();
1257  BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1258 
1259  if (ClonedConditionToErase)
1260  RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1261  MSSAU);
1262 
1263  // If there are duplicate entries in the PHI nodes because of multiple edges
1264  // to the unswitched successor, we need to nuke all but one as we replaced it
1265  // with a direct branch.
1266  for (PHINode &PN : ClonedSuccBB->phis()) {
1267  bool Found = false;
1268  // Loop over the incoming operands backwards so we can easily delete as we
1269  // go without invalidating the index.
1270  for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1271  if (PN.getIncomingBlock(i) != ClonedParentBB)
1272  continue;
1273  if (!Found) {
1274  Found = true;
1275  continue;
1276  }
1277  PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1278  }
1279  }
1280 
1281  // Record the domtree updates for the new blocks.
1283  for (auto *ClonedBB : NewBlocks) {
1284  for (auto *SuccBB : successors(ClonedBB))
1285  if (SuccSet.insert(SuccBB).second)
1286  DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1287  SuccSet.clear();
1288  }
1289 
1290  return ClonedPH;
1291 }
1292 
1293 /// Recursively clone the specified loop and all of its children.
1294 ///
1295 /// The target parent loop for the clone should be provided, or can be null if
1296 /// the clone is a top-level loop. While cloning, all the blocks are mapped
1297 /// with the provided value map. The entire original loop must be present in
1298 /// the value map. The cloned loop is returned.
1299 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1300  const ValueToValueMapTy &VMap, LoopInfo &LI) {
1301  auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1302  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1303  ClonedL.reserveBlocks(OrigL.getNumBlocks());
1304  for (auto *BB : OrigL.blocks()) {
1305  auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1306  ClonedL.addBlockEntry(ClonedBB);
1307  if (LI.getLoopFor(BB) == &OrigL)
1308  LI.changeLoopFor(ClonedBB, &ClonedL);
1309  }
1310  };
1311 
1312  // We specially handle the first loop because it may get cloned into
1313  // a different parent and because we most commonly are cloning leaf loops.
1314  Loop *ClonedRootL = LI.AllocateLoop();
1315  if (RootParentL)
1316  RootParentL->addChildLoop(ClonedRootL);
1317  else
1318  LI.addTopLevelLoop(ClonedRootL);
1319  AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1320 
1321  if (OrigRootL.isInnermost())
1322  return ClonedRootL;
1323 
1324  // If we have a nest, we can quickly clone the entire loop nest using an
1325  // iterative approach because it is a tree. We keep the cloned parent in the
1326  // data structure to avoid repeatedly querying through a map to find it.
1327  SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1328  // Build up the loops to clone in reverse order as we'll clone them from the
1329  // back.
1330  for (Loop *ChildL : llvm::reverse(OrigRootL))
1331  LoopsToClone.push_back({ClonedRootL, ChildL});
1332  do {
1333  Loop *ClonedParentL, *L;
1334  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1335  Loop *ClonedL = LI.AllocateLoop();
1336  ClonedParentL->addChildLoop(ClonedL);
1337  AddClonedBlocksToLoop(*L, *ClonedL);
1338  for (Loop *ChildL : llvm::reverse(*L))
1339  LoopsToClone.push_back({ClonedL, ChildL});
1340  } while (!LoopsToClone.empty());
1341 
1342  return ClonedRootL;
1343 }
1344 
1345 /// Build the cloned loops of an original loop from unswitching.
1346 ///
1347 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1348 /// operation. We need to re-verify that there even is a loop (as the backedge
1349 /// may not have been cloned), and even if there are remaining backedges the
1350 /// backedge set may be different. However, we know that each child loop is
1351 /// undisturbed, we only need to find where to place each child loop within
1352 /// either any parent loop or within a cloned version of the original loop.
1353 ///
1354 /// Because child loops may end up cloned outside of any cloned version of the
1355 /// original loop, multiple cloned sibling loops may be created. All of them
1356 /// are returned so that the newly introduced loop nest roots can be
1357 /// identified.
1358 static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1359  const ValueToValueMapTy &VMap, LoopInfo &LI,
1360  SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1361  Loop *ClonedL = nullptr;
1362 
1363  auto *OrigPH = OrigL.getLoopPreheader();
1364  auto *OrigHeader = OrigL.getHeader();
1365 
1366  auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1367  auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1368 
1369  // We need to know the loops of the cloned exit blocks to even compute the
1370  // accurate parent loop. If we only clone exits to some parent of the
1371  // original parent, we want to clone into that outer loop. We also keep track
1372  // of the loops that our cloned exit blocks participate in.
1373  Loop *ParentL = nullptr;
1374  SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1376  ClonedExitsInLoops.reserve(ExitBlocks.size());
1377  for (auto *ExitBB : ExitBlocks)
1378  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1379  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1380  ExitLoopMap[ClonedExitBB] = ExitL;
1381  ClonedExitsInLoops.push_back(ClonedExitBB);
1382  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1383  ParentL = ExitL;
1384  }
1385  assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1386  ParentL->contains(OrigL.getParentLoop())) &&
1387  "The computed parent loop should always contain (or be) the parent of "
1388  "the original loop.");
1389 
1390  // We build the set of blocks dominated by the cloned header from the set of
1391  // cloned blocks out of the original loop. While not all of these will
1392  // necessarily be in the cloned loop, it is enough to establish that they
1393  // aren't in unreachable cycles, etc.
1394  SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1395  for (auto *BB : OrigL.blocks())
1396  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1397  ClonedLoopBlocks.insert(ClonedBB);
1398 
1399  // Rebuild the set of blocks that will end up in the cloned loop. We may have
1400  // skipped cloning some region of this loop which can in turn skip some of
1401  // the backedges so we have to rebuild the blocks in the loop based on the
1402  // backedges that remain after cloning.
1404  SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1405  for (auto *Pred : predecessors(ClonedHeader)) {
1406  // The only possible non-loop header predecessor is the preheader because
1407  // we know we cloned the loop in simplified form.
1408  if (Pred == ClonedPH)
1409  continue;
1410 
1411  // Because the loop was in simplified form, the only non-loop predecessor
1412  // should be the preheader.
1413  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1414  "header other than the preheader "
1415  "that is not part of the loop!");
1416 
1417  // Insert this block into the loop set and on the first visit (and if it
1418  // isn't the header we're currently walking) put it into the worklist to
1419  // recurse through.
1420  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1421  Worklist.push_back(Pred);
1422  }
1423 
1424  // If we had any backedges then there *is* a cloned loop. Put the header into
1425  // the loop set and then walk the worklist backwards to find all the blocks
1426  // that remain within the loop after cloning.
1427  if (!BlocksInClonedLoop.empty()) {
1428  BlocksInClonedLoop.insert(ClonedHeader);
1429 
1430  while (!Worklist.empty()) {
1431  BasicBlock *BB = Worklist.pop_back_val();
1432  assert(BlocksInClonedLoop.count(BB) &&
1433  "Didn't put block into the loop set!");
1434 
1435  // Insert any predecessors that are in the possible set into the cloned
1436  // set, and if the insert is successful, add them to the worklist. Note
1437  // that we filter on the blocks that are definitely reachable via the
1438  // backedge to the loop header so we may prune out dead code within the
1439  // cloned loop.
1440  for (auto *Pred : predecessors(BB))
1441  if (ClonedLoopBlocks.count(Pred) &&
1442  BlocksInClonedLoop.insert(Pred).second)
1443  Worklist.push_back(Pred);
1444  }
1445 
1446  ClonedL = LI.AllocateLoop();
1447  if (ParentL) {
1448  ParentL->addBasicBlockToLoop(ClonedPH, LI);
1449  ParentL->addChildLoop(ClonedL);
1450  } else {
1451  LI.addTopLevelLoop(ClonedL);
1452  }
1453  NonChildClonedLoops.push_back(ClonedL);
1454 
1455  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1456  // We don't want to just add the cloned loop blocks based on how we
1457  // discovered them. The original order of blocks was carefully built in
1458  // a way that doesn't rely on predecessor ordering. Rather than re-invent
1459  // that logic, we just re-walk the original blocks (and those of the child
1460  // loops) and filter them as we add them into the cloned loop.
1461  for (auto *BB : OrigL.blocks()) {
1462  auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1463  if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1464  continue;
1465 
1466  // Directly add the blocks that are only in this loop.
1467  if (LI.getLoopFor(BB) == &OrigL) {
1468  ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1469  continue;
1470  }
1471 
1472  // We want to manually add it to this loop and parents.
1473  // Registering it with LoopInfo will happen when we clone the top
1474  // loop for this block.
1475  for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1476  PL->addBlockEntry(ClonedBB);
1477  }
1478 
1479  // Now add each child loop whose header remains within the cloned loop. All
1480  // of the blocks within the loop must satisfy the same constraints as the
1481  // header so once we pass the header checks we can just clone the entire
1482  // child loop nest.
1483  for (Loop *ChildL : OrigL) {
1484  auto *ClonedChildHeader =
1485  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1486  if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1487  continue;
1488 
1489 #ifndef NDEBUG
1490  // We should never have a cloned child loop header but fail to have
1491  // all of the blocks for that child loop.
1492  for (auto *ChildLoopBB : ChildL->blocks())
1493  assert(BlocksInClonedLoop.count(
1494  cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1495  "Child cloned loop has a header within the cloned outer "
1496  "loop but not all of its blocks!");
1497 #endif
1498 
1499  cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1500  }
1501  }
1502 
1503  // Now that we've handled all the components of the original loop that were
1504  // cloned into a new loop, we still need to handle anything from the original
1505  // loop that wasn't in a cloned loop.
1506 
1507  // Figure out what blocks are left to place within any loop nest containing
1508  // the unswitched loop. If we never formed a loop, the cloned PH is one of
1509  // them.
1510  SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1511  if (BlocksInClonedLoop.empty())
1512  UnloopedBlockSet.insert(ClonedPH);
1513  for (auto *ClonedBB : ClonedLoopBlocks)
1514  if (!BlocksInClonedLoop.count(ClonedBB))
1515  UnloopedBlockSet.insert(ClonedBB);
1516 
1517  // Copy the cloned exits and sort them in ascending loop depth, we'll work
1518  // backwards across these to process them inside out. The order shouldn't
1519  // matter as we're just trying to build up the map from inside-out; we use
1520  // the map in a more stably ordered way below.
1521  auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1522  llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1523  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1524  ExitLoopMap.lookup(RHS)->getLoopDepth();
1525  });
1526 
1527  // Populate the existing ExitLoopMap with everything reachable from each
1528  // exit, starting from the inner most exit.
1529  while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1530  assert(Worklist.empty() && "Didn't clear worklist!");
1531 
1532  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1533  Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1534 
1535  // Walk the CFG back until we hit the cloned PH adding everything reachable
1536  // and in the unlooped set to this exit block's loop.
1537  Worklist.push_back(ExitBB);
1538  do {
1539  BasicBlock *BB = Worklist.pop_back_val();
1540  // We can stop recursing at the cloned preheader (if we get there).
1541  if (BB == ClonedPH)
1542  continue;
1543 
1544  for (BasicBlock *PredBB : predecessors(BB)) {
1545  // If this pred has already been moved to our set or is part of some
1546  // (inner) loop, no update needed.
1547  if (!UnloopedBlockSet.erase(PredBB)) {
1548  assert(
1549  (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1550  "Predecessor not mapped to a loop!");
1551  continue;
1552  }
1553 
1554  // We just insert into the loop set here. We'll add these blocks to the
1555  // exit loop after we build up the set in an order that doesn't rely on
1556  // predecessor order (which in turn relies on use list order).
1557  bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1558  (void)Inserted;
1559  assert(Inserted && "Should only visit an unlooped block once!");
1560 
1561  // And recurse through to its predecessors.
1562  Worklist.push_back(PredBB);
1563  }
1564  } while (!Worklist.empty());
1565  }
1566 
1567  // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1568  // blocks to their outer loops, walk the cloned blocks and the cloned exits
1569  // in their original order adding them to the correct loop.
1570 
1571  // We need a stable insertion order. We use the order of the original loop
1572  // order and map into the correct parent loop.
1573  for (auto *BB : llvm::concat<BasicBlock *const>(
1574  makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1575  if (Loop *OuterL = ExitLoopMap.lookup(BB))
1576  OuterL->addBasicBlockToLoop(BB, LI);
1577 
1578 #ifndef NDEBUG
1579  for (auto &BBAndL : ExitLoopMap) {
1580  auto *BB = BBAndL.first;
1581  auto *OuterL = BBAndL.second;
1582  assert(LI.getLoopFor(BB) == OuterL &&
1583  "Failed to put all blocks into outer loops!");
1584  }
1585 #endif
1586 
1587  // Now that all the blocks are placed into the correct containing loop in the
1588  // absence of child loops, find all the potentially cloned child loops and
1589  // clone them into whatever outer loop we placed their header into.
1590  for (Loop *ChildL : OrigL) {
1591  auto *ClonedChildHeader =
1592  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1593  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1594  continue;
1595 
1596 #ifndef NDEBUG
1597  for (auto *ChildLoopBB : ChildL->blocks())
1598  assert(VMap.count(ChildLoopBB) &&
1599  "Cloned a child loop header but not all of that loops blocks!");
1600 #endif
1601 
1602  NonChildClonedLoops.push_back(cloneLoopNest(
1603  *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1604  }
1605 }
1606 
1607 static void
1609  ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1610  DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1611  // Find all the dead clones, and remove them from their successors.
1612  SmallVector<BasicBlock *, 16> DeadBlocks;
1613  for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1614  for (const auto &VMap : VMaps)
1615  if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1616  if (!DT.isReachableFromEntry(ClonedBB)) {
1617  for (BasicBlock *SuccBB : successors(ClonedBB))
1618  SuccBB->removePredecessor(ClonedBB);
1619  DeadBlocks.push_back(ClonedBB);
1620  }
1621 
1622  // Remove all MemorySSA in the dead blocks
1623  if (MSSAU) {
1624  SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1625  DeadBlocks.end());
1626  MSSAU->removeBlocks(DeadBlockSet);
1627  }
1628 
1629  // Drop any remaining references to break cycles.
1630  for (BasicBlock *BB : DeadBlocks)
1631  BB->dropAllReferences();
1632  // Erase them from the IR.
1633  for (BasicBlock *BB : DeadBlocks)
1634  BB->eraseFromParent();
1635 }
1636 
1637 static void
1639  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1640  DominatorTree &DT, LoopInfo &LI,
1641  MemorySSAUpdater *MSSAU,
1642  ScalarEvolution *SE,
1643  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
1644  // Find all the dead blocks tied to this loop, and remove them from their
1645  // successors.
1646  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1647 
1648  // Start with loop/exit blocks and get a transitive closure of reachable dead
1649  // blocks.
1650  SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1651  ExitBlocks.end());
1652  DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1653  while (!DeathCandidates.empty()) {
1654  auto *BB = DeathCandidates.pop_back_val();
1655  if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1656  for (BasicBlock *SuccBB : successors(BB)) {
1657  SuccBB->removePredecessor(BB);
1658  DeathCandidates.push_back(SuccBB);
1659  }
1660  DeadBlockSet.insert(BB);
1661  }
1662  }
1663 
1664  // Remove all MemorySSA in the dead blocks
1665  if (MSSAU)
1666  MSSAU->removeBlocks(DeadBlockSet);
1667 
1668  // Filter out the dead blocks from the exit blocks list so that it can be
1669  // used in the caller.
1670  llvm::erase_if(ExitBlocks,
1671  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1672 
1673  // Walk from this loop up through its parents removing all of the dead blocks.
1674  for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1675  for (auto *BB : DeadBlockSet)
1676  ParentL->getBlocksSet().erase(BB);
1677  llvm::erase_if(ParentL->getBlocksVector(),
1678  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1679  }
1680 
1681  // Now delete the dead child loops. This raw delete will clear them
1682  // recursively.
1683  llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1684  if (!DeadBlockSet.count(ChildL->getHeader()))
1685  return false;
1686 
1687  assert(llvm::all_of(ChildL->blocks(),
1688  [&](BasicBlock *ChildBB) {
1689  return DeadBlockSet.count(ChildBB);
1690  }) &&
1691  "If the child loop header is dead all blocks in the child loop must "
1692  "be dead as well!");
1693  DestroyLoopCB(*ChildL, ChildL->getName());
1694  if (SE)
1696  LI.destroy(ChildL);
1697  return true;
1698  });
1699 
1700  // Remove the loop mappings for the dead blocks and drop all the references
1701  // from these blocks to others to handle cyclic references as we start
1702  // deleting the blocks themselves.
1703  for (auto *BB : DeadBlockSet) {
1704  // Check that the dominator tree has already been updated.
1705  assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1706  LI.changeLoopFor(BB, nullptr);
1707  // Drop all uses of the instructions to make sure we won't have dangling
1708  // uses in other blocks.
1709  for (auto &I : *BB)
1710  if (!I.use_empty())
1711  I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1712  BB->dropAllReferences();
1713  }
1714 
1715  // Actually delete the blocks now that they've been fully unhooked from the
1716  // IR.
1717  for (auto *BB : DeadBlockSet)
1718  BB->eraseFromParent();
1719 }
1720 
1721 /// Recompute the set of blocks in a loop after unswitching.
1722 ///
1723 /// This walks from the original headers predecessors to rebuild the loop. We
1724 /// take advantage of the fact that new blocks can't have been added, and so we
1725 /// filter by the original loop's blocks. This also handles potentially
1726 /// unreachable code that we don't want to explore but might be found examining
1727 /// the predecessors of the header.
1728 ///
1729 /// If the original loop is no longer a loop, this will return an empty set. If
1730 /// it remains a loop, all the blocks within it will be added to the set
1731 /// (including those blocks in inner loops).
1733  LoopInfo &LI) {
1735 
1736  auto *PH = L.getLoopPreheader();
1737  auto *Header = L.getHeader();
1738 
1739  // A worklist to use while walking backwards from the header.
1741 
1742  // First walk the predecessors of the header to find the backedges. This will
1743  // form the basis of our walk.
1744  for (auto *Pred : predecessors(Header)) {
1745  // Skip the preheader.
1746  if (Pred == PH)
1747  continue;
1748 
1749  // Because the loop was in simplified form, the only non-loop predecessor
1750  // is the preheader.
1751  assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1752  "than the preheader that is not part of the "
1753  "loop!");
1754 
1755  // Insert this block into the loop set and on the first visit and, if it
1756  // isn't the header we're currently walking, put it into the worklist to
1757  // recurse through.
1758  if (LoopBlockSet.insert(Pred).second && Pred != Header)
1759  Worklist.push_back(Pred);
1760  }
1761 
1762  // If no backedges were found, we're done.
1763  if (LoopBlockSet.empty())
1764  return LoopBlockSet;
1765 
1766  // We found backedges, recurse through them to identify the loop blocks.
1767  while (!Worklist.empty()) {
1768  BasicBlock *BB = Worklist.pop_back_val();
1769  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1770 
1771  // No need to walk past the header.
1772  if (BB == Header)
1773  continue;
1774 
1775  // Because we know the inner loop structure remains valid we can use the
1776  // loop structure to jump immediately across the entire nested loop.
1777  // Further, because it is in loop simplified form, we can directly jump
1778  // to its preheader afterward.
1779  if (Loop *InnerL = LI.getLoopFor(BB))
1780  if (InnerL != &L) {
1781  assert(L.contains(InnerL) &&
1782  "Should not reach a loop *outside* this loop!");
1783  // The preheader is the only possible predecessor of the loop so
1784  // insert it into the set and check whether it was already handled.
1785  auto *InnerPH = InnerL->getLoopPreheader();
1786  assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1787  "but not contain the inner loop "
1788  "preheader!");
1789  if (!LoopBlockSet.insert(InnerPH).second)
1790  // The only way to reach the preheader is through the loop body
1791  // itself so if it has been visited the loop is already handled.
1792  continue;
1793 
1794  // Insert all of the blocks (other than those already present) into
1795  // the loop set. We expect at least the block that led us to find the
1796  // inner loop to be in the block set, but we may also have other loop
1797  // blocks if they were already enqueued as predecessors of some other
1798  // outer loop block.
1799  for (auto *InnerBB : InnerL->blocks()) {
1800  if (InnerBB == BB) {
1801  assert(LoopBlockSet.count(InnerBB) &&
1802  "Block should already be in the set!");
1803  continue;
1804  }
1805 
1806  LoopBlockSet.insert(InnerBB);
1807  }
1808 
1809  // Add the preheader to the worklist so we will continue past the
1810  // loop body.
1811  Worklist.push_back(InnerPH);
1812  continue;
1813  }
1814 
1815  // Insert any predecessors that were in the original loop into the new
1816  // set, and if the insert is successful, add them to the worklist.
1817  for (auto *Pred : predecessors(BB))
1818  if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1819  Worklist.push_back(Pred);
1820  }
1821 
1822  assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1823 
1824  // We've found all the blocks participating in the loop, return our completed
1825  // set.
1826  return LoopBlockSet;
1827 }
1828 
1829 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1830 ///
1831 /// The removal may have removed some child loops entirely but cannot have
1832 /// disturbed any remaining child loops. However, they may need to be hoisted
1833 /// to the parent loop (or to be top-level loops). The original loop may be
1834 /// completely removed.
1835 ///
1836 /// The sibling loops resulting from this update are returned. If the original
1837 /// loop remains a valid loop, it will be the first entry in this list with all
1838 /// of the newly sibling loops following it.
1839 ///
1840 /// Returns true if the loop remains a loop after unswitching, and false if it
1841 /// is no longer a loop after unswitching (and should not continue to be
1842 /// referenced).
1844  LoopInfo &LI,
1845  SmallVectorImpl<Loop *> &HoistedLoops,
1846  ScalarEvolution *SE) {
1847  auto *PH = L.getLoopPreheader();
1848 
1849  // Compute the actual parent loop from the exit blocks. Because we may have
1850  // pruned some exits the loop may be different from the original parent.
1851  Loop *ParentL = nullptr;
1852  SmallVector<Loop *, 4> ExitLoops;
1853  SmallVector<BasicBlock *, 4> ExitsInLoops;
1854  ExitsInLoops.reserve(ExitBlocks.size());
1855  for (auto *ExitBB : ExitBlocks)
1856  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1857  ExitLoops.push_back(ExitL);
1858  ExitsInLoops.push_back(ExitBB);
1859  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1860  ParentL = ExitL;
1861  }
1862 
1863  // Recompute the blocks participating in this loop. This may be empty if it
1864  // is no longer a loop.
1865  auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1866 
1867  // If we still have a loop, we need to re-set the loop's parent as the exit
1868  // block set changing may have moved it within the loop nest. Note that this
1869  // can only happen when this loop has a parent as it can only hoist the loop
1870  // *up* the nest.
1871  if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1872  // Remove this loop's (original) blocks from all of the intervening loops.
1873  for (Loop *IL = L.getParentLoop(); IL != ParentL;
1874  IL = IL->getParentLoop()) {
1875  IL->getBlocksSet().erase(PH);
1876  for (auto *BB : L.blocks())
1877  IL->getBlocksSet().erase(BB);
1878  llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1879  return BB == PH || L.contains(BB);
1880  });
1881  }
1882 
1883  LI.changeLoopFor(PH, ParentL);
1884  L.getParentLoop()->removeChildLoop(&L);
1885  if (ParentL)
1886  ParentL->addChildLoop(&L);
1887  else
1888  LI.addTopLevelLoop(&L);
1889  }
1890 
1891  // Now we update all the blocks which are no longer within the loop.
1892  auto &Blocks = L.getBlocksVector();
1893  auto BlocksSplitI =
1894  LoopBlockSet.empty()
1895  ? Blocks.begin()
1896  : std::stable_partition(
1897  Blocks.begin(), Blocks.end(),
1898  [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1899 
1900  // Before we erase the list of unlooped blocks, build a set of them.
1901  SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1902  if (LoopBlockSet.empty())
1903  UnloopedBlocks.insert(PH);
1904 
1905  // Now erase these blocks from the loop.
1906  for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1907  L.getBlocksSet().erase(BB);
1908  Blocks.erase(BlocksSplitI, Blocks.end());
1909 
1910  // Sort the exits in ascending loop depth, we'll work backwards across these
1911  // to process them inside out.
1912  llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1913  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1914  });
1915 
1916  // We'll build up a set for each exit loop.
1917  SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1918  Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1919 
1920  auto RemoveUnloopedBlocksFromLoop =
1921  [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1922  for (auto *BB : UnloopedBlocks)
1923  L.getBlocksSet().erase(BB);
1925  return UnloopedBlocks.count(BB);
1926  });
1927  };
1928 
1930  while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1931  assert(Worklist.empty() && "Didn't clear worklist!");
1932  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1933 
1934  // Grab the next exit block, in decreasing loop depth order.
1935  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1936  Loop &ExitL = *LI.getLoopFor(ExitBB);
1937  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1938 
1939  // Erase all of the unlooped blocks from the loops between the previous
1940  // exit loop and this exit loop. This works because the ExitInLoops list is
1941  // sorted in increasing order of loop depth and thus we visit loops in
1942  // decreasing order of loop depth.
1943  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1944  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1945 
1946  // Walk the CFG back until we hit the cloned PH adding everything reachable
1947  // and in the unlooped set to this exit block's loop.
1948  Worklist.push_back(ExitBB);
1949  do {
1950  BasicBlock *BB = Worklist.pop_back_val();
1951  // We can stop recursing at the cloned preheader (if we get there).
1952  if (BB == PH)
1953  continue;
1954 
1955  for (BasicBlock *PredBB : predecessors(BB)) {
1956  // If this pred has already been moved to our set or is part of some
1957  // (inner) loop, no update needed.
1958  if (!UnloopedBlocks.erase(PredBB)) {
1959  assert((NewExitLoopBlocks.count(PredBB) ||
1960  ExitL.contains(LI.getLoopFor(PredBB))) &&
1961  "Predecessor not in a nested loop (or already visited)!");
1962  continue;
1963  }
1964 
1965  // We just insert into the loop set here. We'll add these blocks to the
1966  // exit loop after we build up the set in a deterministic order rather
1967  // than the predecessor-influenced visit order.
1968  bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1969  (void)Inserted;
1970  assert(Inserted && "Should only visit an unlooped block once!");
1971 
1972  // And recurse through to its predecessors.
1973  Worklist.push_back(PredBB);
1974  }
1975  } while (!Worklist.empty());
1976 
1977  // If blocks in this exit loop were directly part of the original loop (as
1978  // opposed to a child loop) update the map to point to this exit loop. This
1979  // just updates a map and so the fact that the order is unstable is fine.
1980  for (auto *BB : NewExitLoopBlocks)
1981  if (Loop *BBL = LI.getLoopFor(BB))
1982  if (BBL == &L || !L.contains(BBL))
1983  LI.changeLoopFor(BB, &ExitL);
1984 
1985  // We will remove the remaining unlooped blocks from this loop in the next
1986  // iteration or below.
1987  NewExitLoopBlocks.clear();
1988  }
1989 
1990  // Any remaining unlooped blocks are no longer part of any loop unless they
1991  // are part of some child loop.
1992  for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1993  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1994  for (auto *BB : UnloopedBlocks)
1995  if (Loop *BBL = LI.getLoopFor(BB))
1996  if (BBL == &L || !L.contains(BBL))
1997  LI.changeLoopFor(BB, nullptr);
1998 
1999  // Sink all the child loops whose headers are no longer in the loop set to
2000  // the parent (or to be top level loops). We reach into the loop and directly
2001  // update its subloop vector to make this batch update efficient.
2002  auto &SubLoops = L.getSubLoopsVector();
2003  auto SubLoopsSplitI =
2004  LoopBlockSet.empty()
2005  ? SubLoops.begin()
2006  : std::stable_partition(
2007  SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2008  return LoopBlockSet.count(SubL->getHeader());
2009  });
2010  for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2011  HoistedLoops.push_back(HoistedL);
2012  HoistedL->setParentLoop(nullptr);
2013 
2014  // To compute the new parent of this hoisted loop we look at where we
2015  // placed the preheader above. We can't lookup the header itself because we
2016  // retained the mapping from the header to the hoisted loop. But the
2017  // preheader and header should have the exact same new parent computed
2018  // based on the set of exit blocks from the original loop as the preheader
2019  // is a predecessor of the header and so reached in the reverse walk. And
2020  // because the loops were all in simplified form the preheader of the
2021  // hoisted loop can't be part of some *other* loop.
2022  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2023  NewParentL->addChildLoop(HoistedL);
2024  else
2025  LI.addTopLevelLoop(HoistedL);
2026  }
2027  SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2028 
2029  // Actually delete the loop if nothing remained within it.
2030  if (Blocks.empty()) {
2031  assert(SubLoops.empty() &&
2032  "Failed to remove all subloops from the original loop!");
2033  if (Loop *ParentL = L.getParentLoop())
2034  ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2035  else
2036  LI.removeLoop(llvm::find(LI, &L));
2037  // markLoopAsDeleted for L should be triggered by the caller (it is typically
2038  // done by using the UnswitchCB callback).
2039  if (SE)
2041  LI.destroy(&L);
2042  return false;
2043  }
2044 
2045  return true;
2046 }
2047 
2048 /// Helper to visit a dominator subtree, invoking a callable on each node.
2049 ///
2050 /// Returning false at any point will stop walking past that node of the tree.
2051 template <typename CallableT>
2052 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2053  SmallVector<DomTreeNode *, 4> DomWorklist;
2054  DomWorklist.push_back(DT[BB]);
2055 #ifndef NDEBUG
2057  Visited.insert(DT[BB]);
2058 #endif
2059  do {
2060  DomTreeNode *N = DomWorklist.pop_back_val();
2061 
2062  // Visit this node.
2063  if (!Callable(N->getBlock()))
2064  continue;
2065 
2066  // Accumulate the child nodes.
2067  for (DomTreeNode *ChildN : *N) {
2068  assert(Visited.insert(ChildN).second &&
2069  "Cannot visit a node twice when walking a tree!");
2070  DomWorklist.push_back(ChildN);
2071  }
2072  } while (!DomWorklist.empty());
2073 }
2074 
2076  Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2077  IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2078  AssumptionCache &AC,
2079  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2080  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2081  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2082  auto *ParentBB = TI.getParent();
2083  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2084  SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2085 
2086  // We can only unswitch switches, conditional branches with an invariant
2087  // condition, or combining invariant conditions with an instruction or
2088  // partially invariant instructions.
2089  assert((SI || (BI && BI->isConditional())) &&
2090  "Can only unswitch switches and conditional branch!");
2091  bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2092  bool FullUnswitch =
2093  SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2094  !PartiallyInvariant);
2095  if (FullUnswitch)
2096  assert(Invariants.size() == 1 &&
2097  "Cannot have other invariants with full unswitching!");
2098  else
2099  assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
2100  "Partial unswitching requires an instruction as the condition!");
2101 
2102  if (MSSAU && VerifyMemorySSA)
2103  MSSAU->getMemorySSA()->verifyMemorySSA();
2104 
2105  // Constant and BBs tracking the cloned and continuing successor. When we are
2106  // unswitching the entire condition, this can just be trivially chosen to
2107  // unswitch towards `true`. However, when we are unswitching a set of
2108  // invariants combined with `and` or `or` or partially invariant instructions,
2109  // the combining operation determines the best direction to unswitch: we want
2110  // to unswitch the direction that will collapse the branch.
2111  bool Direction = true;
2112  int ClonedSucc = 0;
2113  if (!FullUnswitch) {
2115  (void)Cond;
2117  PartiallyInvariant) &&
2118  "Only `or`, `and`, an `select`, partially invariant instructions "
2119  "can combine invariants being unswitched.");
2120  if (!match(Cond, m_LogicalOr())) {
2121  if (match(Cond, m_LogicalAnd()) ||
2122  (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2123  Direction = false;
2124  ClonedSucc = 1;
2125  }
2126  }
2127  }
2128 
2129  BasicBlock *RetainedSuccBB =
2130  BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2131  SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2132  if (BI)
2133  UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2134  else
2135  for (auto Case : SI->cases())
2136  if (Case.getCaseSuccessor() != RetainedSuccBB)
2137  UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2138 
2139  assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2140  "Should not unswitch the same successor we are retaining!");
2141 
2142  // The branch should be in this exact loop. Any inner loop's invariant branch
2143  // should be handled by unswitching that inner loop. The caller of this
2144  // routine should filter out any candidates that remain (but were skipped for
2145  // whatever reason).
2146  assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2147 
2148  // Compute the parent loop now before we start hacking on things.
2149  Loop *ParentL = L.getParentLoop();
2150  // Get blocks in RPO order for MSSA update, before changing the CFG.
2151  LoopBlocksRPO LBRPO(&L);
2152  if (MSSAU)
2153  LBRPO.perform(&LI);
2154 
2155  // Compute the outer-most loop containing one of our exit blocks. This is the
2156  // furthest up our loopnest which can be mutated, which we will use below to
2157  // update things.
2158  Loop *OuterExitL = &L;
2159  SmallVector<BasicBlock *, 4> ExitBlocks;
2160  L.getUniqueExitBlocks(ExitBlocks);
2161  for (auto *ExitBB : ExitBlocks) {
2162  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2163  if (!NewOuterExitL) {
2164  // We exited the entire nest with this block, so we're done.
2165  OuterExitL = nullptr;
2166  break;
2167  }
2168  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2169  OuterExitL = NewOuterExitL;
2170  }
2171 
2172  // At this point, we're definitely going to unswitch something so invalidate
2173  // any cached information in ScalarEvolution for the outer most loop
2174  // containing an exit block and all nested loops.
2175  if (SE) {
2176  if (OuterExitL)
2177  SE->forgetLoop(OuterExitL);
2178  else
2179  SE->forgetTopmostLoop(&L);
2181  }
2182 
2183  bool InsertFreeze = false;
2184  if (FreezeLoopUnswitchCond) {
2185  ICFLoopSafetyInfo SafetyInfo;
2186  SafetyInfo.computeLoopSafetyInfo(&L);
2187  InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
2188  }
2189 
2190  // If the edge from this terminator to a successor dominates that successor,
2191  // store a map from each block in its dominator subtree to it. This lets us
2192  // tell when cloning for a particular successor if a block is dominated by
2193  // some *other* successor with a single data structure. We use this to
2194  // significantly reduce cloning.
2196  for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2197  makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2198  if (SuccBB->getUniquePredecessor() ||
2199  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2200  return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2201  }))
2202  visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2203  DominatingSucc[BB] = SuccBB;
2204  return true;
2205  });
2206 
2207  // Split the preheader, so that we know that there is a safe place to insert
2208  // the conditional branch. We will change the preheader to have a conditional
2209  // branch on LoopCond. The original preheader will become the split point
2210  // between the unswitched versions, and we will have a new preheader for the
2211  // original loop.
2212  BasicBlock *SplitBB = L.getLoopPreheader();
2213  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2214 
2215  // Keep track of the dominator tree updates needed.
2217 
2218  // Clone the loop for each unswitched successor.
2220  VMaps.reserve(UnswitchedSuccBBs.size());
2222  for (auto *SuccBB : UnswitchedSuccBBs) {
2223  VMaps.emplace_back(new ValueToValueMapTy());
2224  ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2225  L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2226  DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2227  }
2228 
2229  // Drop metadata if we may break its semantics by moving this instr into the
2230  // split block.
2231  if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2233  // Do not spend time trying to understand if we can keep it, just drop it
2234  // to save compile time.
2235  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2236  else {
2237  // It is only legal to preserve make.implicit metadata if we are
2238  // guaranteed no reach implicit null check after following this branch.
2239  ICFLoopSafetyInfo SafetyInfo;
2240  SafetyInfo.computeLoopSafetyInfo(&L);
2241  if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2242  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2243  }
2244  }
2245 
2246  // The stitching of the branched code back together depends on whether we're
2247  // doing full unswitching or not with the exception that we always want to
2248  // nuke the initial terminator placed in the split block.
2249  SplitBB->getTerminator()->eraseFromParent();
2250  if (FullUnswitch) {
2251  // Splice the terminator from the original loop and rewrite its
2252  // successors.
2253  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2254 
2255  // Keep a clone of the terminator for MSSA updates.
2256  Instruction *NewTI = TI.clone();
2257  ParentBB->getInstList().push_back(NewTI);
2258 
2259  // First wire up the moved terminator to the preheaders.
2260  if (BI) {
2261  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2262  BI->setSuccessor(ClonedSucc, ClonedPH);
2263  BI->setSuccessor(1 - ClonedSucc, LoopPH);
2265  if (InsertFreeze) {
2266  if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
2267  Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI);
2268  }
2269  BI->setCondition(Cond);
2270  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2271  } else {
2272  assert(SI && "Must either be a branch or switch!");
2273 
2274  // Walk the cases and directly update their successors.
2275  assert(SI->getDefaultDest() == RetainedSuccBB &&
2276  "Not retaining default successor!");
2277  SI->setDefaultDest(LoopPH);
2278  for (const auto &Case : SI->cases())
2279  if (Case.getCaseSuccessor() == RetainedSuccBB)
2280  Case.setSuccessor(LoopPH);
2281  else
2282  Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2283 
2284  if (InsertFreeze) {
2285  auto Cond = SI->getCondition();
2286  if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT))
2287  SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI));
2288  }
2289  // We need to use the set to populate domtree updates as even when there
2290  // are multiple cases pointing at the same successor we only want to
2291  // remove and insert one edge in the domtree.
2292  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2293  DTUpdates.push_back(
2294  {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2295  }
2296 
2297  if (MSSAU) {
2298  DT.applyUpdates(DTUpdates);
2299  DTUpdates.clear();
2300 
2301  // Remove all but one edge to the retained block and all unswitched
2302  // blocks. This is to avoid having duplicate entries in the cloned Phis,
2303  // when we know we only keep a single edge for each case.
2304  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2305  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2306  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2307 
2308  for (auto &VMap : VMaps)
2309  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2310  /*IgnoreIncomingWithNoClones=*/true);
2311  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2312 
2313  // Remove all edges to unswitched blocks.
2314  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2315  MSSAU->removeEdge(ParentBB, SuccBB);
2316  }
2317 
2318  // Now unhook the successor relationship as we'll be replacing
2319  // the terminator with a direct branch. This is much simpler for branches
2320  // than switches so we handle those first.
2321  if (BI) {
2322  // Remove the parent as a predecessor of the unswitched successor.
2323  assert(UnswitchedSuccBBs.size() == 1 &&
2324  "Only one possible unswitched block for a branch!");
2325  BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2326  UnswitchedSuccBB->removePredecessor(ParentBB,
2327  /*KeepOneInputPHIs*/ true);
2328  DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2329  } else {
2330  // Note that we actually want to remove the parent block as a predecessor
2331  // of *every* case successor. The case successor is either unswitched,
2332  // completely eliminating an edge from the parent to that successor, or it
2333  // is a duplicate edge to the retained successor as the retained successor
2334  // is always the default successor and as we'll replace this with a direct
2335  // branch we no longer need the duplicate entries in the PHI nodes.
2336  SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2337  assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2338  "Not retaining default successor!");
2339  for (const auto &Case : NewSI->cases())
2340  Case.getCaseSuccessor()->removePredecessor(
2341  ParentBB,
2342  /*KeepOneInputPHIs*/ true);
2343 
2344  // We need to use the set to populate domtree updates as even when there
2345  // are multiple cases pointing at the same successor we only want to
2346  // remove and insert one edge in the domtree.
2347  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2348  DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2349  }
2350 
2351  // After MSSAU update, remove the cloned terminator instruction NewTI.
2352  ParentBB->getTerminator()->eraseFromParent();
2353 
2354  // Create a new unconditional branch to the continuing block (as opposed to
2355  // the one cloned).
2356  BranchInst::Create(RetainedSuccBB, ParentBB);
2357  } else {
2358  assert(BI && "Only branches have partial unswitching.");
2359  assert(UnswitchedSuccBBs.size() == 1 &&
2360  "Only one possible unswitched block for a branch!");
2361  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2362  // When doing a partial unswitch, we have to do a bit more work to build up
2363  // the branch in the split block.
2364  if (PartiallyInvariant)
2366  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2367  else {
2369  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2370  FreezeLoopUnswitchCond, BI, &AC, DT);
2371  }
2372  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2373 
2374  if (MSSAU) {
2375  DT.applyUpdates(DTUpdates);
2376  DTUpdates.clear();
2377 
2378  // Perform MSSA cloning updates.
2379  for (auto &VMap : VMaps)
2380  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2381  /*IgnoreIncomingWithNoClones=*/true);
2382  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2383  }
2384  }
2385 
2386  // Apply the updates accumulated above to get an up-to-date dominator tree.
2387  DT.applyUpdates(DTUpdates);
2388 
2389  // Now that we have an accurate dominator tree, first delete the dead cloned
2390  // blocks so that we can accurately build any cloned loops. It is important to
2391  // not delete the blocks from the original loop yet because we still want to
2392  // reference the original loop to understand the cloned loop's structure.
2393  deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2394 
2395  // Build the cloned loop structure itself. This may be substantially
2396  // different from the original structure due to the simplified CFG. This also
2397  // handles inserting all the cloned blocks into the correct loops.
2398  SmallVector<Loop *, 4> NonChildClonedLoops;
2399  for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2400  buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2401 
2402  // Now that our cloned loops have been built, we can update the original loop.
2403  // First we delete the dead blocks from it and then we rebuild the loop
2404  // structure taking these deletions into account.
2405  deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE,DestroyLoopCB);
2406 
2407  if (MSSAU && VerifyMemorySSA)
2408  MSSAU->getMemorySSA()->verifyMemorySSA();
2409 
2410  SmallVector<Loop *, 4> HoistedLoops;
2411  bool IsStillLoop =
2412  rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2413 
2414  if (MSSAU && VerifyMemorySSA)
2415  MSSAU->getMemorySSA()->verifyMemorySSA();
2416 
2417  // This transformation has a high risk of corrupting the dominator tree, and
2418  // the below steps to rebuild loop structures will result in hard to debug
2419  // errors in that case so verify that the dominator tree is sane first.
2420  // FIXME: Remove this when the bugs stop showing up and rely on existing
2421  // verification steps.
2423 
2424  if (BI && !PartiallyInvariant) {
2425  // If we unswitched a branch which collapses the condition to a known
2426  // constant we want to replace all the uses of the invariants within both
2427  // the original and cloned blocks. We do this here so that we can use the
2428  // now updated dominator tree to identify which side the users are on.
2429  assert(UnswitchedSuccBBs.size() == 1 &&
2430  "Only one possible unswitched block for a branch!");
2431  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2432 
2433  // When considering multiple partially-unswitched invariants
2434  // we cant just go replace them with constants in both branches.
2435  //
2436  // For 'AND' we infer that true branch ("continue") means true
2437  // for each invariant operand.
2438  // For 'OR' we can infer that false branch ("continue") means false
2439  // for each invariant operand.
2440  // So it happens that for multiple-partial case we dont replace
2441  // in the unswitched branch.
2442  bool ReplaceUnswitched =
2443  FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2444 
2445  ConstantInt *UnswitchedReplacement =
2448  ConstantInt *ContinueReplacement =
2451  for (Value *Invariant : Invariants) {
2452  assert(!isa<Constant>(Invariant) &&
2453  "Should not be replacing constant values!");
2454  // Use make_early_inc_range here as set invalidates the iterator.
2455  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2456  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2457  if (!UserI)
2458  continue;
2459 
2460  // Replace it with the 'continue' side if in the main loop body, and the
2461  // unswitched if in the cloned blocks.
2462  if (DT.dominates(LoopPH, UserI->getParent()))
2463  U.set(ContinueReplacement);
2464  else if (ReplaceUnswitched &&
2465  DT.dominates(ClonedPH, UserI->getParent()))
2466  U.set(UnswitchedReplacement);
2467  }
2468  }
2469  }
2470 
2471  // We can change which blocks are exit blocks of all the cloned sibling
2472  // loops, the current loop, and any parent loops which shared exit blocks
2473  // with the current loop. As a consequence, we need to re-form LCSSA for
2474  // them. But we shouldn't need to re-form LCSSA for any child loops.
2475  // FIXME: This could be made more efficient by tracking which exit blocks are
2476  // new, and focusing on them, but that isn't likely to be necessary.
2477  //
2478  // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2479  // loop nest and update every loop that could have had its exits changed. We
2480  // also need to cover any intervening loops. We add all of these loops to
2481  // a list and sort them by loop depth to achieve this without updating
2482  // unnecessary loops.
2483  auto UpdateLoop = [&](Loop &UpdateL) {
2484 #ifndef NDEBUG
2485  UpdateL.verifyLoop();
2486  for (Loop *ChildL : UpdateL) {
2487  ChildL->verifyLoop();
2488  assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2489  "Perturbed a child loop's LCSSA form!");
2490  }
2491 #endif
2492  // First build LCSSA for this loop so that we can preserve it when
2493  // forming dedicated exits. We don't want to perturb some other loop's
2494  // LCSSA while doing that CFG edit.
2495  formLCSSA(UpdateL, DT, &LI, SE);
2496 
2497  // For loops reached by this loop's original exit blocks we may
2498  // introduced new, non-dedicated exits. At least try to re-form dedicated
2499  // exits for these loops. This may fail if they couldn't have dedicated
2500  // exits to start with.
2501  formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2502  };
2503 
2504  // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2505  // and we can do it in any order as they don't nest relative to each other.
2506  //
2507  // Also check if any of the loops we have updated have become top-level loops
2508  // as that will necessitate widening the outer loop scope.
2509  for (Loop *UpdatedL :
2510  llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2511  UpdateLoop(*UpdatedL);
2512  if (UpdatedL->isOutermost())
2513  OuterExitL = nullptr;
2514  }
2515  if (IsStillLoop) {
2516  UpdateLoop(L);
2517  if (L.isOutermost())
2518  OuterExitL = nullptr;
2519  }
2520 
2521  // If the original loop had exit blocks, walk up through the outer most loop
2522  // of those exit blocks to update LCSSA and form updated dedicated exits.
2523  if (OuterExitL != &L)
2524  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2525  OuterL = OuterL->getParentLoop())
2526  UpdateLoop(*OuterL);
2527 
2528 #ifndef NDEBUG
2529  // Verify the entire loop structure to catch any incorrect updates before we
2530  // progress in the pass pipeline.
2531  LI.verify(DT);
2532 #endif
2533 
2534  // Now that we've unswitched something, make callbacks to report the changes.
2535  // For that we need to merge together the updated loops and the cloned loops
2536  // and check whether the original loop survived.
2537  SmallVector<Loop *, 4> SibLoops;
2538  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2539  if (UpdatedL->getParentLoop() == ParentL)
2540  SibLoops.push_back(UpdatedL);
2541  UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2542 
2543  if (MSSAU && VerifyMemorySSA)
2544  MSSAU->getMemorySSA()->verifyMemorySSA();
2545 
2546  if (BI)
2547  ++NumBranches;
2548  else
2549  ++NumSwitches;
2550 }
2551 
2552 /// Recursively compute the cost of a dominator subtree based on the per-block
2553 /// cost map provided.
2554 ///
2555 /// The recursive computation is memozied into the provided DT-indexed cost map
2556 /// to allow querying it for most nodes in the domtree without it becoming
2557 /// quadratic.
2559  DomTreeNode &N,
2562  // Don't accumulate cost (or recurse through) blocks not in our block cost
2563  // map and thus not part of the duplication cost being considered.
2564  auto BBCostIt = BBCostMap.find(N.getBlock());
2565  if (BBCostIt == BBCostMap.end())
2566  return 0;
2567 
2568  // Lookup this node to see if we already computed its cost.
2569  auto DTCostIt = DTCostMap.find(&N);
2570  if (DTCostIt != DTCostMap.end())
2571  return DTCostIt->second;
2572 
2573  // If not, we have to compute it. We can't use insert above and update
2574  // because computing the cost may insert more things into the map.
2575  InstructionCost Cost = std::accumulate(
2576  N.begin(), N.end(), BBCostIt->second,
2577  [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2578  return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2579  });
2580  bool Inserted = DTCostMap.insert({&N, Cost}).second;
2581  (void)Inserted;
2582  assert(Inserted && "Should not insert a node while visiting children!");
2583  return Cost;
2584 }
2585 
2586 /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2587 /// making the following replacement:
2588 ///
2589 /// --code before guard--
2590 /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2591 /// --code after guard--
2592 ///
2593 /// into
2594 ///
2595 /// --code before guard--
2596 /// br i1 %cond, label %guarded, label %deopt
2597 ///
2598 /// guarded:
2599 /// --code after guard--
2600 ///
2601 /// deopt:
2602 /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2603 /// unreachable
2604 ///
2605 /// It also makes all relevant DT and LI updates, so that all structures are in
2606 /// valid state after this transform.
2608  DominatorTree &DT, LoopInfo &LI,
2609  MemorySSAUpdater *MSSAU) {
2611  LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2612  BasicBlock *CheckBB = GI->getParent();
2613 
2614  if (MSSAU && VerifyMemorySSA)
2615  MSSAU->getMemorySSA()->verifyMemorySSA();
2616 
2617  // Remove all CheckBB's successors from DomTree. A block can be seen among
2618  // successors more than once, but for DomTree it should be added only once.
2619  SmallPtrSet<BasicBlock *, 4> Successors;
2620  for (auto *Succ : successors(CheckBB))
2621  if (Successors.insert(Succ).second)
2622  DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2623 
2624  Instruction *DeoptBlockTerm =
2625  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2626  BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2627  // SplitBlockAndInsertIfThen inserts control flow that branches to
2628  // DeoptBlockTerm if the condition is true. We want the opposite.
2629  CheckBI->swapSuccessors();
2630 
2631  BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2632  GuardedBlock->setName("guarded");
2633  CheckBI->getSuccessor(1)->setName("deopt");
2634  BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2635 
2636  if (MSSAU)
2637  MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2638 
2639  GI->moveBefore(DeoptBlockTerm);
2641 
2642  // Add new successors of CheckBB into DomTree.
2643  for (auto *Succ : successors(CheckBB))
2644  DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2645 
2646  // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2647  // successors.
2648  for (auto *Succ : Successors)
2649  DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2650 
2651  // Make proper changes to DT.
2652  DT.applyUpdates(DTUpdates);
2653  // Inform LI of a new loop block.
2654  L.addBasicBlockToLoop(GuardedBlock, LI);
2655 
2656  if (MSSAU) {
2657  MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2658  MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2659  if (VerifyMemorySSA)
2660  MSSAU->getMemorySSA()->verifyMemorySSA();
2661  }
2662 
2663  ++NumGuards;
2664  return CheckBI;
2665 }
2666 
2667 /// Cost multiplier is a way to limit potentially exponential behavior
2668 /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2669 /// candidates available. Also accounting for the number of "sibling" loops with
2670 /// the idea to account for previous unswitches that already happened on this
2671 /// cluster of loops. There was an attempt to keep this formula simple,
2672 /// just enough to limit the worst case behavior. Even if it is not that simple
2673 /// now it is still not an attempt to provide a detailed heuristic size
2674 /// prediction.
2675 ///
2676 /// TODO: Make a proper accounting of "explosion" effect for all kinds of
2677 /// unswitch candidates, making adequate predictions instead of wild guesses.
2678 /// That requires knowing not just the number of "remaining" candidates but
2679 /// also costs of unswitching for each of these candidates.
2681  const Instruction &TI, const Loop &L, const LoopInfo &LI,
2682  const DominatorTree &DT,
2683  ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2684 
2685  // Guards and other exiting conditions do not contribute to exponential
2686  // explosion as soon as they dominate the latch (otherwise there might be
2687  // another path to the latch remaining that does not allow to eliminate the
2688  // loop copy on unswitch).
2689  const BasicBlock *Latch = L.getLoopLatch();
2690  const BasicBlock *CondBlock = TI.getParent();
2691  if (DT.dominates(CondBlock, Latch) &&
2692  (isGuard(&TI) ||
2693  llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2694  return L.contains(SuccBB);
2695  }) <= 1)) {
2696  NumCostMultiplierSkipped++;
2697  return 1;
2698  }
2699 
2700  auto *ParentL = L.getParentLoop();
2701  int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2702  : std::distance(LI.begin(), LI.end()));
2703  // Count amount of clones that all the candidates might cause during
2704  // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2705  int UnswitchedClones = 0;
2706  for (auto Candidate : UnswitchCandidates) {
2707  const Instruction *CI = Candidate.TI;
2708  const BasicBlock *CondBlock = CI->getParent();
2709  bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2710  if (isGuard(CI)) {
2711  if (!SkipExitingSuccessors)
2712  UnswitchedClones++;
2713  continue;
2714  }
2715  int NonExitingSuccessors =
2716  llvm::count_if(successors(CondBlock),
2717  [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2718  return !SkipExitingSuccessors || L.contains(SuccBB);
2719  });
2720  UnswitchedClones += Log2_32(NonExitingSuccessors);
2721  }
2722 
2723  // Ignore up to the "unscaled candidates" number of unswitch candidates
2724  // when calculating the power-of-two scaling of the cost. The main idea
2725  // with this control is to allow a small number of unswitches to happen
2726  // and rely more on siblings multiplier (see below) when the number
2727  // of candidates is small.
2728  unsigned ClonesPower =
2729  std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2730 
2731  // Allowing top-level loops to spread a bit more than nested ones.
2732  int SiblingsMultiplier =
2733  std::max((ParentL ? SiblingsCount
2734  : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2735  1);
2736  // Compute the cost multiplier in a way that won't overflow by saturating
2737  // at an upper bound.
2738  int CostMultiplier;
2739  if (ClonesPower > Log2_32(UnswitchThreshold) ||
2740  SiblingsMultiplier > UnswitchThreshold)
2741  CostMultiplier = UnswitchThreshold;
2742  else
2743  CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2744  (int)UnswitchThreshold);
2745 
2746  LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2747  << " (siblings " << SiblingsMultiplier << " * clones "
2748  << (1 << ClonesPower) << ")"
2749  << " for unswitch candidate: " << TI << "\n");
2750  return CostMultiplier;
2751 }
2752 
2755  IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2756  const Loop &L, const LoopInfo &LI, AAResults &AA,
2757  const MemorySSAUpdater *MSSAU) {
2758  assert(UnswitchCandidates.empty() && "Should be!");
2759  // Whether or not we should also collect guards in the loop.
2760  bool CollectGuards = false;
2761  if (UnswitchGuards) {
2762  auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2763  Intrinsic::getName(Intrinsic::experimental_guard));
2764  if (GuardDecl && !GuardDecl->use_empty())
2765  CollectGuards = true;
2766  }
2767 
2768  for (auto *BB : L.blocks()) {
2769  if (LI.getLoopFor(BB) != &L)
2770  continue;
2771 
2772  if (CollectGuards)
2773  for (auto &I : *BB)
2774  if (isGuard(&I)) {
2775  auto *Cond =
2776  skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2777  // TODO: Support AND, OR conditions and partial unswitching.
2778  if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2779  UnswitchCandidates.push_back({&I, {Cond}});
2780  }
2781 
2782  if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2783  // We can only consider fully loop-invariant switch conditions as we need
2784  // to completely eliminate the switch after unswitching.
2785  if (!isa<Constant>(SI->getCondition()) &&
2786  L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2787  UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2788  continue;
2789  }
2790 
2791  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2792  if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2793  BI->getSuccessor(0) == BI->getSuccessor(1))
2794  continue;
2795 
2796  Value *Cond = skipTrivialSelect(BI->getCondition());
2797  if (isa<Constant>(Cond))
2798  continue;
2799 
2800  if (L.isLoopInvariant(Cond)) {
2801  UnswitchCandidates.push_back({BI, {Cond}});
2802  continue;
2803  }
2804 
2805  Instruction &CondI = *cast<Instruction>(Cond);
2806  if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2807  TinyPtrVector<Value *> Invariants =
2809  if (Invariants.empty())
2810  continue;
2811 
2812  UnswitchCandidates.push_back({BI, std::move(Invariants)});
2813  continue;
2814  }
2815  }
2816 
2817  if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2818  !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2819  return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2820  })) {
2821  MemorySSA *MSSA = MSSAU->getMemorySSA();
2822  if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2823  LLVM_DEBUG(
2824  dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2825  << *Info->InstToDuplicate[0] << "\n");
2826  PartialIVInfo = *Info;
2827  PartialIVCondBranch = L.getHeader()->getTerminator();
2828  TinyPtrVector<Value *> ValsToDuplicate;
2829  llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2830  UnswitchCandidates.push_back(
2831  {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2832  }
2833  }
2834  return !UnswitchCandidates.empty();
2835 }
2836 
2838  if (!L.isSafeToClone())
2839  return false;
2840  for (auto *BB : L.blocks())
2841  for (auto &I : *BB) {
2842  if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2843  return false;
2844  if (auto *CB = dyn_cast<CallBase>(&I)) {
2845  assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
2846  if (CB->isConvergent())
2847  return false;
2848  }
2849  }
2850 
2851  // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2852  // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2853  // irreducible control flow into reducible control flow and introduce new
2854  // loops "out of thin air". If we ever discover important use cases for doing
2855  // this, we can add support to loop unswitch, but it is a lot of complexity
2856  // for what seems little or no real world benefit.
2857  LoopBlocksRPO RPOT(&L);
2858  RPOT.perform(&LI);
2859  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2860  return false;
2861 
2862  SmallVector<BasicBlock *, 4> ExitBlocks;
2863  L.getUniqueExitBlocks(ExitBlocks);
2864  // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
2865  // instruction as we don't know how to split those exit blocks.
2866  // FIXME: We should teach SplitBlock to handle this and remove this
2867  // restriction.
2868  for (auto *ExitBB : ExitBlocks) {
2869  auto *I = ExitBB->getFirstNonPHI();
2870  if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
2871  LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
2872  "in exit block\n");
2873  return false;
2874  }
2875  }
2876 
2877  return true;
2878 }
2879 
2880 static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
2881  ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
2882  const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
2883  const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
2884  // Given that unswitching these terminators will require duplicating parts of
2885  // the loop, so we need to be able to model that cost. Compute the ephemeral
2886  // values and set up a data structure to hold per-BB costs. We cache each
2887  // block's cost so that we don't recompute this when considering different
2888  // subsets of the loop for duplication during unswitching.
2890  CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2892 
2893  // Compute the cost of each block, as well as the total loop cost. Also, bail
2894  // out if we see instructions which are incompatible with loop unswitching
2895  // (convergent, noduplicate, or cross-basic-block tokens).
2896  // FIXME: We might be able to safely handle some of these in non-duplicated
2897  // regions.
2899  L.getHeader()->getParent()->hasMinSize()
2902  InstructionCost LoopCost = 0;
2903  for (auto *BB : L.blocks()) {
2904  InstructionCost Cost = 0;
2905  for (auto &I : *BB) {
2906  if (EphValues.count(&I))
2907  continue;
2909  }
2910  assert(Cost >= 0 && "Must not have negative costs!");
2911  LoopCost += Cost;
2912  assert(LoopCost >= 0 && "Must not have negative loop costs!");
2913  BBCostMap[BB] = Cost;
2914  }
2915  LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
2916 
2917  // Now we find the best candidate by searching for the one with the following
2918  // properties in order:
2919  //
2920  // 1) An unswitching cost below the threshold
2921  // 2) The smallest number of duplicated unswitch candidates (to avoid
2922  // creating redundant subsequent unswitching)
2923  // 3) The smallest cost after unswitching.
2924  //
2925  // We prioritize reducing fanout of unswitch candidates provided the cost
2926  // remains below the threshold because this has a multiplicative effect.
2927  //
2928  // This requires memoizing each dominator subtree to avoid redundant work.
2929  //
2930  // FIXME: Need to actually do the number of candidates part above.
2932  // Given a terminator which might be unswitched, computes the non-duplicated
2933  // cost for that terminator.
2934  auto ComputeUnswitchedCost = [&](Instruction &TI,
2935  bool FullUnswitch) -> InstructionCost {
2936  BasicBlock &BB = *TI.getParent();
2938 
2939  InstructionCost Cost = 0;
2940  for (BasicBlock *SuccBB : successors(&BB)) {
2941  // Don't count successors more than once.
2942  if (!Visited.insert(SuccBB).second)
2943  continue;
2944 
2945  // If this is a partial unswitch candidate, then it must be a conditional
2946  // branch with a condition of either `or`, `and`, their corresponding
2947  // select forms or partially invariant instructions. In that case, one of
2948  // the successors is necessarily duplicated, so don't even try to remove
2949  // its cost.
2950  if (!FullUnswitch) {
2951  auto &BI = cast<BranchInst>(TI);
2952  Value *Cond = skipTrivialSelect(BI.getCondition());
2953  if (match(Cond, m_LogicalAnd())) {
2954  if (SuccBB == BI.getSuccessor(1))
2955  continue;
2956  } else if (match(Cond, m_LogicalOr())) {
2957  if (SuccBB == BI.getSuccessor(0))
2958  continue;
2959  } else if ((PartialIVInfo.KnownValue->isOneValue() &&
2960  SuccBB == BI.getSuccessor(0)) ||
2961  (!PartialIVInfo.KnownValue->isOneValue() &&
2962  SuccBB == BI.getSuccessor(1)))
2963  continue;
2964  }
2965 
2966  // This successor's domtree will not need to be duplicated after
2967  // unswitching if the edge to the successor dominates it (and thus the
2968  // entire tree). This essentially means there is no other path into this
2969  // subtree and so it will end up live in only one clone of the loop.
2970  if (SuccBB->getUniquePredecessor() ||
2971  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2972  return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2973  })) {
2974  Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2975  assert(Cost <= LoopCost &&
2976  "Non-duplicated cost should never exceed total loop cost!");
2977  }
2978  }
2979 
2980  // Now scale the cost by the number of unique successors minus one. We
2981  // subtract one because there is already at least one copy of the entire
2982  // loop. This is computing the new cost of unswitching a condition.
2983  // Note that guards always have 2 unique successors that are implicit and
2984  // will be materialized if we decide to unswitch it.
2985  int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2986  assert(SuccessorsCount > 1 &&
2987  "Cannot unswitch a condition without multiple distinct successors!");
2988  return (LoopCost - Cost) * (SuccessorsCount - 1);
2989  };
2990 
2992  for (auto &Candidate : UnswitchCandidates) {
2993  Instruction &TI = *Candidate.TI;
2994  ArrayRef<Value *> Invariants = Candidate.Invariants;
2995  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2996  InstructionCost CandidateCost = ComputeUnswitchedCost(
2997  TI, /*FullUnswitch*/ !BI ||
2998  (Invariants.size() == 1 &&
2999  Invariants[0] == skipTrivialSelect(BI->getCondition())));
3000  // Calculate cost multiplier which is a tool to limit potentially
3001  // exponential behavior of loop-unswitch.
3003  int CostMultiplier =
3004  CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3005  assert(
3006  (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3007  "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3008  CandidateCost *= CostMultiplier;
3009  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3010  << " (multiplier: " << CostMultiplier << ")"
3011  << " for unswitch candidate: " << TI << "\n");
3012  } else {
3013  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3014  << " for unswitch candidate: " << TI << "\n");
3015  }
3016 
3017  if (!Best || CandidateCost < Best->Cost) {
3018  Best = Candidate;
3019  Best->Cost = CandidateCost;
3020  }
3021  }
3022  assert(Best && "Must be!");
3023  return *Best;
3024 }
3025 
3027  Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
3029  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3030  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
3031  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3032  // Collect all invariant conditions within this loop (as opposed to an inner
3033  // loop which would be handled when visiting that inner loop).
3035  IVConditionInfo PartialIVInfo;
3036  Instruction *PartialIVCondBranch = nullptr;
3037  // If we didn't find any candidates, we're done.
3038  if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3039  PartialIVCondBranch, L, LI, AA, MSSAU))
3040  return false;
3041 
3042  LLVM_DEBUG(
3043  dbgs() << "Considering " << UnswitchCandidates.size()
3044  << " non-trivial loop invariant conditions for unswitching.\n");
3045 
3046  NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3047  UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3048 
3049  assert(Best.TI && "Failed to find loop unswitch candidate");
3050  assert(Best.Cost && "Failed to compute cost");
3051 
3052  if (*Best.Cost >= UnswitchThreshold) {
3053  LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3054  << "\n");
3055  return false;
3056  }
3057 
3058  if (Best.TI != PartialIVCondBranch)
3059  PartialIVInfo.InstToDuplicate.clear();
3060 
3061  // If the best candidate is a guard, turn it into a branch.
3062  if (isGuard(Best.TI))
3063  Best.TI =
3064  turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3065 
3066  LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3067  << ") terminator: " << *Best.TI << "\n");
3068  unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3069  LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
3070  return true;
3071 }
3072 
3073 /// Unswitch control flow predicated on loop invariant conditions.
3074 ///
3075 /// This first hoists all branches or switches which are trivial (IE, do not
3076 /// require duplicating any part of the loop) out of the loop body. It then
3077 /// looks at other loop invariant control flows and tries to unswitch those as
3078 /// well by cloning the loop if the result is small enough.
3079 ///
3080 /// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3081 /// also updated based on the unswitch. The `MSSA` analysis is also updated if
3082 /// valid (i.e. its use is enabled).
3083 ///
3084 /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3085 /// true, we will attempt to do non-trivial unswitching as well as trivial
3086 /// unswitching.
3087 ///
3088 /// The `UnswitchCB` callback provided will be run after unswitching is
3089 /// complete, with the first parameter set to `true` if the provided loop
3090 /// remains a loop, and a list of new sibling loops created.
3091 ///
3092 /// If `SE` is non-null, we will update that analysis based on the unswitching
3093 /// done.
3094 static bool
3096  AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
3097  bool NonTrivial,
3098  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3099  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
3101  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3102  assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3103  "Loops must be in LCSSA form before unswitching.");
3104 
3105  // Must be in loop simplified form: we need a preheader and dedicated exits.
3106  if (!L.isLoopSimplifyForm())
3107  return false;
3108 
3109  // Try trivial unswitch first before loop over other basic blocks in the loop.
3110  if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3111  // If we unswitched successfully we will want to clean up the loop before
3112  // processing it further so just mark it as unswitched and return.
3113  UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3114  return true;
3115  }
3116 
3117  // Check whether we should continue with non-trivial conditions.
3118  // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3119  // unswitching for testing and debugging.
3120  // NonTrivial: Parameter that enables non-trivial unswitching for this
3121  // invocation of the transform. But this should be allowed only
3122  // for targets without branch divergence.
3123  //
3124  // FIXME: If divergence analysis becomes available to a loop
3125  // transform, we should allow unswitching for non-trivial uniform
3126  // branches even on targets that have divergence.
3127  // https://bugs.llvm.org/show_bug.cgi?id=48819
3128  bool ContinueWithNonTrivial =
3129  EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
3130  if (!ContinueWithNonTrivial)
3131  return false;
3132 
3133  // Skip non-trivial unswitching for optsize functions.
3134  if (L.getHeader()->getParent()->hasOptSize())
3135  return false;
3136 
3137  // Skip cold loops, as unswitching them brings little benefit
3138  // but increases the code size
3139  if (PSI && PSI->hasProfileSummary() && BFI &&
3141  LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
3142  return false;
3143  }
3144 
3145  // Perform legality checks.
3146  if (!isSafeForNoNTrivialUnswitching(L, LI))
3147  return false;
3148 
3149  // For non-trivial unswitching, because it often creates new loops, we rely on
3150  // the pass manager to iterate on the loops rather than trying to immediately
3151  // reach a fixed point. There is no substantial advantage to iterating
3152  // internally, and if any of the new loops are simplified enough to contain
3153  // trivial unswitching we want to prefer those.
3154 
3155  // Try to unswitch the best invariant condition. We prefer this full unswitch to
3156  // a partial unswitch when possible below the threshold.
3157  if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
3158  DestroyLoopCB))
3159  return true;
3160 
3161  // No other opportunities to unswitch.
3162  return false;
3163 }
3164 
3167  LPMUpdater &U) {
3168  Function &F = *L.getHeader()->getParent();
3169  (void)F;
3170  ProfileSummaryInfo *PSI = nullptr;
3171  if (auto OuterProxy =
3173  .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3174  PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3175  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3176  << "\n");
3177 
3178  // Save the current loop name in a variable so that we can report it even
3179  // after it has been deleted.
3180  std::string LoopName = std::string(L.getName());
3181 
3182  auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3183  bool PartiallyInvariant,
3184  ArrayRef<Loop *> NewLoops) {
3185  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3186  if (!NewLoops.empty())
3187  U.addSiblingLoops(NewLoops);
3188 
3189  // If the current loop remains valid, we should revisit it to catch any
3190  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3191  if (CurrentLoopValid) {
3192  if (PartiallyInvariant) {
3193  // Mark the new loop as partially unswitched, to avoid unswitching on
3194  // the same condition again.
3195  auto &Context = L.getHeader()->getContext();
3196  MDNode *DisableUnswitchMD = MDNode::get(
3197  Context,
3198  MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3200  Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3201  {DisableUnswitchMD});
3202  L.setLoopID(NewLoopID);
3203  } else
3204  U.revisitCurrentLoop();
3205  } else
3206  U.markLoopAsDeleted(L, LoopName);
3207  };
3208 
3209  auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
3210  U.markLoopAsDeleted(L, Name);
3211  };
3212 
3214  if (AR.MSSA) {
3215  MSSAU = MemorySSAUpdater(AR.MSSA);
3216  if (VerifyMemorySSA)
3217  AR.MSSA->verifyMemorySSA();
3218  }
3219  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3220  UnswitchCB, &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI,
3221  DestroyLoopCB))
3222  return PreservedAnalyses::all();
3223 
3224  if (AR.MSSA && VerifyMemorySSA)
3225  AR.MSSA->verifyMemorySSA();
3226 
3227  // Historically this pass has had issues with the dominator tree so verify it
3228  // in asserts builds.
3230 
3231  auto PA = getLoopPassPreservedAnalyses();
3232  if (AR.MSSA)
3233  PA.preserve<MemorySSAAnalysis>();
3234  return PA;
3235 }
3236 
3238  raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3240  OS, MapClassName2PassName);
3241 
3242  OS << "<";
3243  OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3244  OS << (Trivial ? "" : "no-") << "trivial";
3245  OS << ">";
3246 }
3247 
3248 namespace {
3249 
3250 class SimpleLoopUnswitchLegacyPass : public LoopPass {
3251  bool NonTrivial;
3252 
3253 public:
3254  static char ID; // Pass ID, replacement for typeid
3255 
3256  explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3257  : LoopPass(ID), NonTrivial(NonTrivial) {
3260  }
3261 
3262  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3263 
3264  void getAnalysisUsage(AnalysisUsage &AU) const override {
3270  }
3271 };
3272 
3273 } // end anonymous namespace
3274 
3275 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3276  if (skipLoop(L))
3277  return false;
3278 
3279  Function &F = *L->getHeader()->getParent();
3280 
3281  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
3282  << "\n");
3283  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3284  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3285  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3286  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3287  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3288  MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3289  MemorySSAUpdater MSSAU(MSSA);
3290 
3291  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3292  auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3293 
3294  auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3295  ArrayRef<Loop *> NewLoops) {
3296  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3297  for (auto *NewL : NewLoops)
3298  LPM.addLoop(*NewL);
3299 
3300  // If the current loop remains valid, re-add it to the queue. This is
3301  // a little wasteful as we'll finish processing the current loop as well,
3302  // but it is the best we can do in the old PM.
3303  if (CurrentLoopValid) {
3304  // If the current loop has been unswitched using a partially invariant
3305  // condition, we should not re-add the current loop to avoid unswitching
3306  // on the same condition again.
3307  if (!PartiallyInvariant)
3308  LPM.addLoop(*L);
3309  } else
3310  LPM.markLoopAsDeleted(*L);
3311  };
3312 
3313  auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
3314  LPM.markLoopAsDeleted(L);
3315  };
3316 
3317  if (VerifyMemorySSA)
3318  MSSA->verifyMemorySSA();
3319  bool Changed =
3320  unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
3321  &MSSAU, nullptr, nullptr, DestroyLoopCB);
3322 
3323  if (VerifyMemorySSA)
3324  MSSA->verifyMemorySSA();
3325 
3326  // Historically this pass has had issues with the dominator tree so verify it
3327  // in asserts builds.
3329 
3330  return Changed;
3331 }
3332 
3334 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3335  "Simple unswitch loops", false, false)
3342 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3344 
3346  return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3347 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::formLCSSA
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:341
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1056
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:518
cloneLoopNest
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Definition: SimpleLoopUnswitch.cpp:1299
AssumptionCache.h
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:308
llvm::createSimpleLoopUnswitchLegacyPass
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
Definition: SimpleLoopUnswitch.cpp:3345
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:217
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
UnswitchNumInitialUnscaledCandidates
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1869
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:224
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:53
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:387
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:64
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:224
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
ValueMapper.h
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::hasPartialIVCondition
Optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1707
llvm::SwitchInstProfUpdateWrapper
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
Definition: Instructions.h:3619
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::PassInfoMixin< SimpleLoopUnswitchPass >
llvm::SwitchInstProfUpdateWrapper::getSuccessorWeight
CaseWeightOpt getSuccessorWeight(unsigned idx)
Definition: Instructions.cpp:4654
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::cfg::UpdateKind::Insert
@ Insert
unswitchTrivialBranch
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:464
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition: ProfileSummaryInfo.h:68
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5440
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:699
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::SwitchInst::Create
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3466
recomputeLoopBlockSet
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
Definition: SimpleLoopUnswitch.cpp:1732
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:942
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1972
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1030
ValueTracking.h
Local.h
llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1403
llvm::MemorySSAUpdater::removeBlocks
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
Definition: MemorySSAUpdater.cpp:1372
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:142
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:220
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin
iterator begin()
Definition: DenseMap.h:75
ScalarEvolution.h
unswitchBestCondition
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:3026
DenseMap.h
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::DominatorTreeBase::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::MemorySSAUpdater::moveAllAfterSpliceBlocks
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
Definition: MemorySSAUpdater.cpp:1247
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
unswitch
simple loop unswitch
Definition: SimpleLoopUnswitch.cpp:3342
llvm::Optional
Definition: APInt.h:33
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::SwitchInstProfUpdateWrapper::removeCase
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Definition: Instructions.cpp:4612
STLExtras.h
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::SwitchInst::cases
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Definition: Instructions.h:3523
llvm::SwitchInst::getDefaultDest
BasicBlock * getDefaultDest() const
Definition: Instructions.h:3484
llvm::SimpleLoopUnswitchPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: SimpleLoopUnswitch.cpp:3237
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
Sequence.h
llvm::Constant::isOneValue
bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:110
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:176
llvm::zip_first
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:884
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1877
computeDomSubtreeCost
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
Definition: SimpleLoopUnswitch.cpp:2558
Use.h
MustExecute.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:202
llvm::MemorySSAUpdater::applyUpdates
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:809
llvm::MemorySSAUpdater::createMemoryAccessInBB
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
Definition: MemorySSAUpdater.cpp:1429
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1417
llvm::LPPassManager::addLoop
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1400
buildPartialInvariantUnswitchConditionalBranch
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
Definition: SimpleLoopUnswitch.cpp:248
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3231
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1456
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
LoopAnalysisManager.h
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1126
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
DropNonTrivialImplicitNullChecks
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
Instruction.h
CommandLine.h
CodeMetrics.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::SwitchInst::CaseHandleImpl::getCaseSuccessor
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
Definition: Instructions.h:3350
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:114
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1709
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:254
hoistLoopToNewParent
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
Definition: SimpleLoopUnswitch.cpp:364
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:984
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1462
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
buildClonedLoopBlocks
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
Definition: SimpleLoopUnswitch.cpp:1110
llvm::AAResults
Definition: AliasAnalysis.h:294
InlinePriorityMode::Cost
@ Cost
llvm::DominatorTreeBase::insertEdge
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
Definition: GenericDomTree.h:584
FreezeLoopUnswitchCond
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
Twine.h
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:412
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:306
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:237
llvm::SwitchInst::CaseHandleImpl::getSuccessorIndex
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
Definition: Instructions.h:3361
SI
@ SI
Definition: SIInstrInfo.cpp:7882
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TinyPtrVector::push_back
void push_back(EltTy NewVal)
Definition: TinyPtrVector.h:244
rewritePHINodesForExitAndUnswitchedBlocks
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Definition: SimpleLoopUnswitch.cpp:319
llvm::LoopInfoBase::addTopLevelLoop
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1049
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
llvm::LoopInfoBase::destroy
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:1096
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
UnswitchThreshold
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:547
llvm::MemorySSAUpdater::moveToPlace
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Definition: MemorySSAUpdater.cpp:1200
CalculateUnswitchCostMultiplier
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
Definition: SimpleLoopUnswitch.cpp:2680
llvm::Instruction
Definition: Instruction.h:42
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3219
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:364
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:375
llvm::SwitchInstProfUpdateWrapper::setSuccessorWeight
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Definition: Instructions.cpp:4660
rebuildLoopAfterUnswitch
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
Definition: SimpleLoopUnswitch.cpp:1843
LoopUtils.h
llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:260
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::LPPassManager
Definition: LoopPass.h:76
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
PatternMatch.h
visitDomSubTree
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
Definition: SimpleLoopUnswitch.cpp:2052
findBestNonTrivialUnswitchCandidate
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
Definition: SimpleLoopUnswitch.cpp:2880
llvm::ValueMap::count
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:152
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:891
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:71
LoopIterator.h
deleteDeadClonedBlocks
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy >> VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:1608
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1381
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:723
llvm::MemorySSAUpdater::removeDuplicatePhiEdgesBetween
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
Definition: MemorySSAUpdater.cpp:538
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:517
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:271
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::LoopInfoBase::AllocateLoop
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:956
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3214
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1657
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::MemorySSAUpdater::applyInsertUpdates
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:860
llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition: SmallPtrSet.h:92
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:138
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:41
BasicBlock.h
llvm::cl::opt< bool >
skipTrivialSelect
static Value * skipTrivialSelect(Value *Cond)
Definition: SimpleLoopUnswitch.cpp:134
llvm::ScalarEvolution::forgetBlockAndLoopDispositions
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
Definition: ScalarEvolution.cpp:8461
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
isSafeForNoNTrivialUnswitching
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
Definition: SimpleLoopUnswitch.cpp:2837
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:81
unswitchNontrivialInvariants
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:2075
llvm::MemorySSAUpdater::removeEdge
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
Definition: MemorySSAUpdater.cpp:531
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::SwitchInst::setDefaultDest
void setDefaultDest(BasicBlock *DefaultCase)
Definition: Instructions.h:3488
ProfileSummaryInfo.h
llvm::LoopPass
Definition: LoopPass.h:28
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2641
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2561
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:650
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:242
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:142
llvm::MemorySSAUpdater::updateForClonedLoop
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
Definition: MemorySSAUpdater.cpp:676
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
buildPartialUnswitchConditionalBranch
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
Definition: SimpleLoopUnswitch.cpp:228
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3189
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
SimpleLoopUnswitch.h
llvm::ScalarEvolution::forgetValue
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Definition: ScalarEvolution.cpp:8432
UnswitchGuards
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:498
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ValueToValueMapTy
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
Definition: MemorySSAUpdater.h:50
getTopMostExitingLoop
static const Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
Definition: SimpleLoopUnswitch.cpp:435
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:97
llvm::IVConditionInfo::KnownValue
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:522
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
llvm::LoopBase::getBlocksVector
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
Definition: LoopInfo.h:209
llvm::IVConditionInfo::InstToDuplicate
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:519
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:967
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:8428
llvm::Loop::isRecursivelyLCSSAForm
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, bool IgnoreTokens=true) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:469
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
LoopPassManager.h
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:934
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
InstructionCost.h
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
CFG.h
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:892
llvm::LoopInfoBase::getLoopDepth
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:999
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:95
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:421
llvm::LoopBase::getSubLoopsVector
std::vector< LoopT * > & getSubLoopsVector()
Definition: LoopInfo.h:164
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1716
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:221
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::MemorySSA::getBlockDefs
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:765
llvm::initializeSimpleLoopUnswitchLegacyPassPass
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
areLoopExitPHIsLoopInvariant
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
Definition: SimpleLoopUnswitch.cpp:208
llvm::SwitchInst::CaseHandleImpl::getCaseValue
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Definition: Instructions.h:3343
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:185
llvm::LoopBase::reserveBlocks
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:453
unswitchLoop
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Unswitch control flow predicated on loop invariant conditions.
Definition: SimpleLoopUnswitch.cpp:3095
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1988
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
loops
simple loop Simple unswitch loops
Definition: SimpleLoopUnswitch.cpp:3343
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
unswitchTrivialSwitch
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:696
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
llvm::findOptionMDForLoop
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1043
llvm::LoopStandardAnalysisResults::BFI
BlockFrequencyInfo * BFI
Definition: LoopAnalysisManager.h:59
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:432
llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1023
llvm::DomTreeNodeBase< BasicBlock >
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::LoopInfoBase::removeLoop
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:1019
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:834
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:645
llvm::MemorySSAUpdater::updateExitBlocksForClonedLoop
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Definition: MemorySSAUpdater.cpp:787
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:318
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:89
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::SimpleLoopUnswitchPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: SimpleLoopUnswitch.cpp:3165
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:182
Constant.h
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:598
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:58
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1922
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:8372
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:827
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
collectHomogenousInstGraphLoopInvariants
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
Definition: SimpleLoopUnswitch.cpp:149
llvm::SwitchInstProfUpdateWrapper::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
Definition: Instructions.cpp:4626
collectUnswitchCandidates
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:2753
llvm::ProfileSummaryInfo::isFunctionColdInCallGraph
bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI) const
Returns true if F contains only cold code.
Definition: ProfileSummaryInfo.cpp:144
MSSAThreshold
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:110
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:58
llvm::IRBuilderBase::CreateFreeze
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2391
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::ICFLoopSafetyInfo
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2740
GenericDomTree.h
buildClonedLoops
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
Definition: SimpleLoopUnswitch.cpp:1358
llvm::LoopBase::getBlocksSet
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
Definition: LoopInfo.h:215
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
llvm::MemorySSA::BeforeTerminator
@ BeforeTerminator
Definition: MemorySSA.h:788
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::IVConditionInfo
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:516
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:52
llvm::SwitchInstProfUpdateWrapper::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
Definition: Instructions.cpp:4645
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
GuardUtils.h
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:373
llvm::TinyPtrVector::empty
bool empty() const
Definition: TinyPtrVector.h:163
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:968
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
Instructions.h
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:136
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:258
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::FreezeInst
This class represents a freeze function that returns random concrete value if an operand is either a ...
Definition: Instructions.h:5437
N
#define N
rewritePHINodesForUnswitchedExitBlock
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
Definition: SimpleLoopUnswitch.cpp:297
llvm::SwitchInst::CaseHandle
Definition: Instructions.h:3377
llvm::BasicBlock::size
size_t size() const
Definition: BasicBlock.h:316
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::Loop::isSafeToClone
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
Definition: LoopInfo.cpp:486
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:79
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2698
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:342
llvm::SmallVectorImpl< DominatorTree::UpdateType >
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:642
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:242
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ICFLoopSafetyInfo::isGuaranteedToExecute
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Definition: MustExecute.cpp:277
llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:165
deleteDeadBlocksFromLoop
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:1638
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
turnGuardIntoBranch
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
Definition: SimpleLoopUnswitch.cpp:2607
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3277
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1525
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3133
raw_ostream.h
llvm::LoopBlocksRPO::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:667
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:918
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
replaceLoopInvariantUses
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
Definition: SimpleLoopUnswitch.cpp:191
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3212
llvm::PatternMatch::m_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
Definition: PatternMatch.h:2543
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3226
SetVector.h
llvm::DominatorTreeBase::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:100
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
unswitchAllTrivialConditions
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
Definition: SimpleLoopUnswitch.cpp:991
UnswitchSiblingsToplevelDiv
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
EnableNonTrivialUnswitch
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
EnableUnswitchCostMultiplier
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1732