LLVM  14.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1 ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/Sequence.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/LoopPass.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/PatternMatch.h"
43 #include "llvm/IR/Use.h"
44 #include "llvm/IR/Value.h"
45 #include "llvm/InitializePasses.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <iterator>
61 #include <numeric>
62 #include <utility>
63 
64 #define DEBUG_TYPE "simple-loop-unswitch"
65 
66 using namespace llvm;
67 using namespace llvm::PatternMatch;
68 
69 STATISTIC(NumBranches, "Number of branches unswitched");
70 STATISTIC(NumSwitches, "Number of switches unswitched");
71 STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
72 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
73 STATISTIC(
74  NumCostMultiplierSkipped,
75  "Number of unswitch candidates that had their cost multiplier skipped");
76 
78  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
79  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
80  "following the configuration passed into the pass."));
81 
82 static cl::opt<int>
83  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
85  cl::desc("The cost threshold for unswitching a loop."));
86 
88  "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
89  cl::desc("Enable unswitch cost multiplier that prohibits exponential "
90  "explosion in nontrivial unswitch."));
92  "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
93  cl::desc("Toplevel siblings divisor for cost multiplier."));
95  "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
96  cl::desc("Number of unswitch candidates that are ignored when calculating "
97  "cost multiplier."));
99  "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
100  cl::desc("If enabled, simple loop unswitching will also consider "
101  "llvm.experimental.guard intrinsics as unswitch candidates."));
103  "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
104  cl::init(false), cl::Hidden,
105  cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
106  "null checks to save time analyzing if we can keep it."));
107 static cl::opt<unsigned>
108  MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
109  cl::desc("Max number of memory uses to explore during "
110  "partial unswitching analysis"),
111  cl::init(100), cl::Hidden);
113  "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden,
114  cl::desc("If enabled, the freeze instruction will be added to condition "
115  "of loop unswitch to prevent miscompilation."));
116 
117 /// Collect all of the loop invariant input values transitively used by the
118 /// homogeneous instruction graph from a given root.
119 ///
120 /// This essentially walks from a root recursively through loop variant operands
121 /// which have the exact same opcode and finds all inputs which are loop
122 /// invariant. For some operations these can be re-associated and unswitched out
123 /// of the loop entirely.
126  LoopInfo &LI) {
127  assert(!L.isLoopInvariant(&Root) &&
128  "Only need to walk the graph if root itself is not invariant.");
129  TinyPtrVector<Value *> Invariants;
130 
131  bool IsRootAnd = match(&Root, m_LogicalAnd());
132  bool IsRootOr = match(&Root, m_LogicalOr());
133 
134  // Build a worklist and recurse through operators collecting invariants.
137  Worklist.push_back(&Root);
138  Visited.insert(&Root);
139  do {
140  Instruction &I = *Worklist.pop_back_val();
141  for (Value *OpV : I.operand_values()) {
142  // Skip constants as unswitching isn't interesting for them.
143  if (isa<Constant>(OpV))
144  continue;
145 
146  // Add it to our result if loop invariant.
147  if (L.isLoopInvariant(OpV)) {
148  Invariants.push_back(OpV);
149  continue;
150  }
151 
152  // If not an instruction with the same opcode, nothing we can do.
153  Instruction *OpI = dyn_cast<Instruction>(OpV);
154 
155  if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
156  (IsRootOr && match(OpI, m_LogicalOr())))) {
157  // Visit this operand.
158  if (Visited.insert(OpI).second)
159  Worklist.push_back(OpI);
160  }
161  }
162  } while (!Worklist.empty());
163 
164  return Invariants;
165 }
166 
167 static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
168  Constant &Replacement) {
169  assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
170 
171  // Replace uses of LIC in the loop with the given constant.
172  // We use make_early_inc_range as set invalidates the iterator.
173  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
174  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
175 
176  // Replace this use within the loop body.
177  if (UserI && L.contains(UserI))
178  U.set(&Replacement);
179  }
180 }
181 
182 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
183 /// incoming values along this edge.
184 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
185  BasicBlock &ExitBB) {
186  for (Instruction &I : ExitBB) {
187  auto *PN = dyn_cast<PHINode>(&I);
188  if (!PN)
189  // No more PHIs to check.
190  return true;
191 
192  // If the incoming value for this edge isn't loop invariant the unswitch
193  // won't be trivial.
194  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
195  return false;
196  }
197  llvm_unreachable("Basic blocks should never be empty!");
198 }
199 
200 /// Copy a set of loop invariant values \p ToDuplicate and insert them at the
201 /// end of \p BB and conditionally branch on the copied condition. We only
202 /// branch on a single value.
204  BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
205  BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) {
206  IRBuilder<> IRB(&BB);
207 
208  Value *Cond = Direction ? IRB.CreateOr(Invariants) :
209  IRB.CreateAnd(Invariants);
210  if (InsertFreeze)
211  Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr");
212  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
213  Direction ? &NormalSucc : &UnswitchedSucc);
214 }
215 
216 /// Copy a set of loop invariant values, and conditionally branch on them.
218  BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
219  BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
220  MemorySSAUpdater *MSSAU) {
221  ValueToValueMapTy VMap;
222  for (auto *Val : reverse(ToDuplicate)) {
223  Instruction *Inst = cast<Instruction>(Val);
224  Instruction *NewInst = Inst->clone();
225  BB.getInstList().insert(BB.end(), NewInst);
226  RemapInstruction(NewInst, VMap,
228  VMap[Val] = NewInst;
229 
230  if (!MSSAU)
231  continue;
232 
233  MemorySSA *MSSA = MSSAU->getMemorySSA();
234  if (auto *MemUse =
235  dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
236  auto *DefiningAccess = MemUse->getDefiningAccess();
237  // Get the first defining access before the loop.
238  while (L.contains(DefiningAccess->getBlock())) {
239  // If the defining access is a MemoryPhi, get the incoming
240  // value for the pre-header as defining access.
241  if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
242  DefiningAccess =
243  MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
244  else
245  DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
246  }
247  MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
248  NewInst->getParent(),
250  }
251  }
252 
253  IRBuilder<> IRB(&BB);
254  Value *Cond = VMap[ToDuplicate[0]];
255  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
256  Direction ? &NormalSucc : &UnswitchedSucc);
257 }
258 
259 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
260 ///
261 /// Requires that the loop exit and unswitched basic block are the same, and
262 /// that the exiting block was a unique predecessor of that block. Rewrites the
263 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
264 /// PHI nodes from the old preheader that now contains the unswitched
265 /// terminator.
267  BasicBlock &OldExitingBB,
268  BasicBlock &OldPH) {
269  for (PHINode &PN : UnswitchedBB.phis()) {
270  // When the loop exit is directly unswitched we just need to update the
271  // incoming basic block. We loop to handle weird cases with repeated
272  // incoming blocks, but expect to typically only have one operand here.
273  for (auto i : seq<int>(0, PN.getNumOperands())) {
274  assert(PN.getIncomingBlock(i) == &OldExitingBB &&
275  "Found incoming block different from unique predecessor!");
276  PN.setIncomingBlock(i, &OldPH);
277  }
278  }
279 }
280 
281 /// Rewrite the PHI nodes in the loop exit basic block and the split off
282 /// unswitched block.
283 ///
284 /// Because the exit block remains an exit from the loop, this rewrites the
285 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
286 /// nodes into the unswitched basic block to select between the value in the
287 /// old preheader and the loop exit.
289  BasicBlock &UnswitchedBB,
290  BasicBlock &OldExitingBB,
291  BasicBlock &OldPH,
292  bool FullUnswitch) {
293  assert(&ExitBB != &UnswitchedBB &&
294  "Must have different loop exit and unswitched blocks!");
295  Instruction *InsertPt = &*UnswitchedBB.begin();
296  for (PHINode &PN : ExitBB.phis()) {
297  auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
298  PN.getName() + ".split", InsertPt);
299 
300  // Walk backwards over the old PHI node's inputs to minimize the cost of
301  // removing each one. We have to do this weird loop manually so that we
302  // create the same number of new incoming edges in the new PHI as we expect
303  // each case-based edge to be included in the unswitched switch in some
304  // cases.
305  // FIXME: This is really, really gross. It would be much cleaner if LLVM
306  // allowed us to create a single entry for a predecessor block without
307  // having separate entries for each "edge" even though these edges are
308  // required to produce identical results.
309  for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
310  if (PN.getIncomingBlock(i) != &OldExitingBB)
311  continue;
312 
313  Value *Incoming = PN.getIncomingValue(i);
314  if (FullUnswitch)
315  // No more edge from the old exiting block to the exit block.
316  PN.removeIncomingValue(i);
317 
318  NewPN->addIncoming(Incoming, &OldPH);
319  }
320 
321  // Now replace the old PHI with the new one and wire the old one in as an
322  // input to the new one.
323  PN.replaceAllUsesWith(NewPN);
324  NewPN->addIncoming(&PN, &ExitBB);
325  }
326 }
327 
328 /// Hoist the current loop up to the innermost loop containing a remaining exit.
329 ///
330 /// Because we've removed an exit from the loop, we may have changed the set of
331 /// loops reachable and need to move the current loop up the loop nest or even
332 /// to an entirely separate nest.
333 static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
334  DominatorTree &DT, LoopInfo &LI,
335  MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
336  // If the loop is already at the top level, we can't hoist it anywhere.
337  Loop *OldParentL = L.getParentLoop();
338  if (!OldParentL)
339  return;
340 
342  L.getExitBlocks(Exits);
343  Loop *NewParentL = nullptr;
344  for (auto *ExitBB : Exits)
345  if (Loop *ExitL = LI.getLoopFor(ExitBB))
346  if (!NewParentL || NewParentL->contains(ExitL))
347  NewParentL = ExitL;
348 
349  if (NewParentL == OldParentL)
350  return;
351 
352  // The new parent loop (if different) should always contain the old one.
353  if (NewParentL)
354  assert(NewParentL->contains(OldParentL) &&
355  "Can only hoist this loop up the nest!");
356 
357  // The preheader will need to move with the body of this loop. However,
358  // because it isn't in this loop we also need to update the primary loop map.
359  assert(OldParentL == LI.getLoopFor(&Preheader) &&
360  "Parent loop of this loop should contain this loop's preheader!");
361  LI.changeLoopFor(&Preheader, NewParentL);
362 
363  // Remove this loop from its old parent.
364  OldParentL->removeChildLoop(&L);
365 
366  // Add the loop either to the new parent or as a top-level loop.
367  if (NewParentL)
368  NewParentL->addChildLoop(&L);
369  else
370  LI.addTopLevelLoop(&L);
371 
372  // Remove this loops blocks from the old parent and every other loop up the
373  // nest until reaching the new parent. Also update all of these
374  // no-longer-containing loops to reflect the nesting change.
375  for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
376  OldContainingL = OldContainingL->getParentLoop()) {
377  llvm::erase_if(OldContainingL->getBlocksVector(),
378  [&](const BasicBlock *BB) {
379  return BB == &Preheader || L.contains(BB);
380  });
381 
382  OldContainingL->getBlocksSet().erase(&Preheader);
383  for (BasicBlock *BB : L.blocks())
384  OldContainingL->getBlocksSet().erase(BB);
385 
386  // Because we just hoisted a loop out of this one, we have essentially
387  // created new exit paths from it. That means we need to form LCSSA PHI
388  // nodes for values used in the no-longer-nested loop.
389  formLCSSA(*OldContainingL, DT, &LI, SE);
390 
391  // We shouldn't need to form dedicated exits because the exit introduced
392  // here is the (just split by unswitching) preheader. However, after trivial
393  // unswitching it is possible to get new non-dedicated exits out of parent
394  // loop so let's conservatively form dedicated exit blocks and figure out
395  // if we can optimize later.
396  formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
397  /*PreserveLCSSA*/ true);
398  }
399 }
400 
401 // Return the top-most loop containing ExitBB and having ExitBB as exiting block
402 // or the loop containing ExitBB, if there is no parent loop containing ExitBB
403 // as exiting block.
405  Loop *TopMost = LI.getLoopFor(ExitBB);
406  Loop *Current = TopMost;
407  while (Current) {
408  if (Current->isLoopExiting(ExitBB))
409  TopMost = Current;
410  Current = Current->getParentLoop();
411  }
412  return TopMost;
413 }
414 
415 /// Unswitch a trivial branch if the condition is loop invariant.
416 ///
417 /// This routine should only be called when loop code leading to the branch has
418 /// been validated as trivial (no side effects). This routine checks if the
419 /// condition is invariant and one of the successors is a loop exit. This
420 /// allows us to unswitch without duplicating the loop, making it trivial.
421 ///
422 /// If this routine fails to unswitch the branch it returns false.
423 ///
424 /// If the branch can be unswitched, this routine splits the preheader and
425 /// hoists the branch above that split. Preserves loop simplified form
426 /// (splitting the exit block as necessary). It simplifies the branch within
427 /// the loop to an unconditional branch but doesn't remove it entirely. Further
428 /// cleanup can be done with some simplifycfg like pass.
429 ///
430 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
431 /// invalidated by this.
433  LoopInfo &LI, ScalarEvolution *SE,
434  MemorySSAUpdater *MSSAU) {
435  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
436  LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
437 
438  // The loop invariant values that we want to unswitch.
439  TinyPtrVector<Value *> Invariants;
440 
441  // When true, we're fully unswitching the branch rather than just unswitching
442  // some input conditions to the branch.
443  bool FullUnswitch = false;
444 
445  if (L.isLoopInvariant(BI.getCondition())) {
446  Invariants.push_back(BI.getCondition());
447  FullUnswitch = true;
448  } else {
449  if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
450  Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
451  if (Invariants.empty()) {
452  LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
453  return false;
454  }
455  }
456 
457  // Check that one of the branch's successors exits, and which one.
458  bool ExitDirection = true;
459  int LoopExitSuccIdx = 0;
460  auto *LoopExitBB = BI.getSuccessor(0);
461  if (L.contains(LoopExitBB)) {
462  ExitDirection = false;
463  LoopExitSuccIdx = 1;
464  LoopExitBB = BI.getSuccessor(1);
465  if (L.contains(LoopExitBB)) {
466  LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
467  return false;
468  }
469  }
470  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
471  auto *ParentBB = BI.getParent();
472  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
473  LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
474  return false;
475  }
476 
477  // When unswitching only part of the branch's condition, we need the exit
478  // block to be reached directly from the partially unswitched input. This can
479  // be done when the exit block is along the true edge and the branch condition
480  // is a graph of `or` operations, or the exit block is along the false edge
481  // and the condition is a graph of `and` operations.
482  if (!FullUnswitch) {
483  if (ExitDirection ? !match(BI.getCondition(), m_LogicalOr())
484  : !match(BI.getCondition(), m_LogicalAnd())) {
485  LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
486  "non-full unswitch!\n");
487  return false;
488  }
489  }
490 
491  LLVM_DEBUG({
492  dbgs() << " unswitching trivial invariant conditions for: " << BI
493  << "\n";
494  for (Value *Invariant : Invariants) {
495  dbgs() << " " << *Invariant << " == true";
496  if (Invariant != Invariants.back())
497  dbgs() << " ||";
498  dbgs() << "\n";
499  }
500  });
501 
502  // If we have scalar evolutions, we need to invalidate them including this
503  // loop, the loop containing the exit block and the topmost parent loop
504  // exiting via LoopExitBB.
505  if (SE) {
506  if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
507  SE->forgetLoop(ExitL);
508  else
509  // Forget the entire nest as this exits the entire nest.
510  SE->forgetTopmostLoop(&L);
511  }
512 
513  if (MSSAU && VerifyMemorySSA)
514  MSSAU->getMemorySSA()->verifyMemorySSA();
515 
516  // Split the preheader, so that we know that there is a safe place to insert
517  // the conditional branch. We will change the preheader to have a conditional
518  // branch on LoopCond.
519  BasicBlock *OldPH = L.getLoopPreheader();
520  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
521 
522  // Now that we have a place to insert the conditional branch, create a place
523  // to branch to: this is the exit block out of the loop that we are
524  // unswitching. We need to split this if there are other loop predecessors.
525  // Because the loop is in simplified form, *any* other predecessor is enough.
526  BasicBlock *UnswitchedBB;
527  if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
528  assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
529  "A branch's parent isn't a predecessor!");
530  UnswitchedBB = LoopExitBB;
531  } else {
532  UnswitchedBB =
533  SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
534  }
535 
536  if (MSSAU && VerifyMemorySSA)
537  MSSAU->getMemorySSA()->verifyMemorySSA();
538 
539  // Actually move the invariant uses into the unswitched position. If possible,
540  // we do this by moving the instructions, but when doing partial unswitching
541  // we do it by building a new merge of the values in the unswitched position.
542  OldPH->getTerminator()->eraseFromParent();
543  if (FullUnswitch) {
544  // If fully unswitching, we can use the existing branch instruction.
545  // Splice it into the old PH to gate reaching the new preheader and re-point
546  // its successors.
547  OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
548  BI);
549  if (MSSAU) {
550  // Temporarily clone the terminator, to make MSSA update cheaper by
551  // separating "insert edge" updates from "remove edge" ones.
552  ParentBB->getInstList().push_back(BI.clone());
553  } else {
554  // Create a new unconditional branch that will continue the loop as a new
555  // terminator.
556  BranchInst::Create(ContinueBB, ParentBB);
557  }
558  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
559  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
560  } else {
561  // Only unswitching a subset of inputs to the condition, so we will need to
562  // build a new branch that merges the invariant inputs.
563  if (ExitDirection)
565  "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
566  "condition!");
567  else
569  "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
570  " condition!");
571  buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
572  *UnswitchedBB, *NewPH, false);
573  }
574 
575  // Update the dominator tree with the added edge.
576  DT.insertEdge(OldPH, UnswitchedBB);
577 
578  // After the dominator tree was updated with the added edge, update MemorySSA
579  // if available.
580  if (MSSAU) {
582  Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
583  MSSAU->applyInsertUpdates(Updates, DT);
584  }
585 
586  // Finish updating dominator tree and memory ssa for full unswitch.
587  if (FullUnswitch) {
588  if (MSSAU) {
589  // Remove the cloned branch instruction.
590  ParentBB->getTerminator()->eraseFromParent();
591  // Create unconditional branch now.
592  BranchInst::Create(ContinueBB, ParentBB);
593  MSSAU->removeEdge(ParentBB, LoopExitBB);
594  }
595  DT.deleteEdge(ParentBB, LoopExitBB);
596  }
597 
598  if (MSSAU && VerifyMemorySSA)
599  MSSAU->getMemorySSA()->verifyMemorySSA();
600 
601  // Rewrite the relevant PHI nodes.
602  if (UnswitchedBB == LoopExitBB)
603  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
604  else
605  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
606  *ParentBB, *OldPH, FullUnswitch);
607 
608  // The constant we can replace all of our invariants with inside the loop
609  // body. If any of the invariants have a value other than this the loop won't
610  // be entered.
611  ConstantInt *Replacement = ExitDirection
614 
615  // Since this is an i1 condition we can also trivially replace uses of it
616  // within the loop with a constant.
617  for (Value *Invariant : Invariants)
618  replaceLoopInvariantUses(L, Invariant, *Replacement);
619 
620  // If this was full unswitching, we may have changed the nesting relationship
621  // for this loop so hoist it to its correct parent if needed.
622  if (FullUnswitch)
623  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
624 
625  if (MSSAU && VerifyMemorySSA)
626  MSSAU->getMemorySSA()->verifyMemorySSA();
627 
628  LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
629  ++NumTrivial;
630  ++NumBranches;
631  return true;
632 }
633 
634 /// Unswitch a trivial switch if the condition is loop invariant.
635 ///
636 /// This routine should only be called when loop code leading to the switch has
637 /// been validated as trivial (no side effects). This routine checks if the
638 /// condition is invariant and that at least one of the successors is a loop
639 /// exit. This allows us to unswitch without duplicating the loop, making it
640 /// trivial.
641 ///
642 /// If this routine fails to unswitch the switch it returns false.
643 ///
644 /// If the switch can be unswitched, this routine splits the preheader and
645 /// copies the switch above that split. If the default case is one of the
646 /// exiting cases, it copies the non-exiting cases and points them at the new
647 /// preheader. If the default case is not exiting, it copies the exiting cases
648 /// and points the default at the preheader. It preserves loop simplified form
649 /// (splitting the exit blocks as necessary). It simplifies the switch within
650 /// the loop by removing now-dead cases. If the default case is one of those
651 /// unswitched, it replaces its destination with a new basic block containing
652 /// only unreachable. Such basic blocks, while technically loop exits, are not
653 /// considered for unswitching so this is a stable transform and the same
654 /// switch will not be revisited. If after unswitching there is only a single
655 /// in-loop successor, the switch is further simplified to an unconditional
656 /// branch. Still more cleanup can be done with some simplifycfg like pass.
657 ///
658 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
659 /// invalidated by this.
661  LoopInfo &LI, ScalarEvolution *SE,
662  MemorySSAUpdater *MSSAU) {
663  LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
664  Value *LoopCond = SI.getCondition();
665 
666  // If this isn't switching on an invariant condition, we can't unswitch it.
667  if (!L.isLoopInvariant(LoopCond))
668  return false;
669 
670  auto *ParentBB = SI.getParent();
671 
672  // The same check must be used both for the default and the exit cases. We
673  // should never leave edges from the switch instruction to a basic block that
674  // we are unswitching, hence the condition used to determine the default case
675  // needs to also be used to populate ExitCaseIndices, which is then used to
676  // remove cases from the switch.
677  auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
678  // BBToCheck is not an exit block if it is inside loop L.
679  if (L.contains(&BBToCheck))
680  return false;
681  // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
682  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
683  return false;
684  // We do not unswitch a block that only has an unreachable statement, as
685  // it's possible this is a previously unswitched block. Only unswitch if
686  // either the terminator is not unreachable, or, if it is, it's not the only
687  // instruction in the block.
688  auto *TI = BBToCheck.getTerminator();
689  bool isUnreachable = isa<UnreachableInst>(TI);
690  return !isUnreachable ||
691  (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
692  };
693 
694  SmallVector<int, 4> ExitCaseIndices;
695  for (auto Case : SI.cases())
696  if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
697  ExitCaseIndices.push_back(Case.getCaseIndex());
698  BasicBlock *DefaultExitBB = nullptr;
701  if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
702  DefaultExitBB = SI.getDefaultDest();
703  } else if (ExitCaseIndices.empty())
704  return false;
705 
706  LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
707 
708  if (MSSAU && VerifyMemorySSA)
709  MSSAU->getMemorySSA()->verifyMemorySSA();
710 
711  // We may need to invalidate SCEVs for the outermost loop reached by any of
712  // the exits.
713  Loop *OuterL = &L;
714 
715  if (DefaultExitBB) {
716  // Clear out the default destination temporarily to allow accurate
717  // predecessor lists to be examined below.
718  SI.setDefaultDest(nullptr);
719  // Check the loop containing this exit.
720  Loop *ExitL = LI.getLoopFor(DefaultExitBB);
721  if (!ExitL || ExitL->contains(OuterL))
722  OuterL = ExitL;
723  }
724 
725  // Store the exit cases into a separate data structure and remove them from
726  // the switch.
727  SmallVector<std::tuple<ConstantInt *, BasicBlock *,
729  4> ExitCases;
730  ExitCases.reserve(ExitCaseIndices.size());
732  // We walk the case indices backwards so that we remove the last case first
733  // and don't disrupt the earlier indices.
734  for (unsigned Index : reverse(ExitCaseIndices)) {
735  auto CaseI = SI.case_begin() + Index;
736  // Compute the outer loop from this exit.
737  Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
738  if (!ExitL || ExitL->contains(OuterL))
739  OuterL = ExitL;
740  // Save the value of this case.
741  auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
742  ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
743  // Delete the unswitched cases.
744  SIW.removeCase(CaseI);
745  }
746 
747  if (SE) {
748  if (OuterL)
749  SE->forgetLoop(OuterL);
750  else
751  SE->forgetTopmostLoop(&L);
752  }
753 
754  // Check if after this all of the remaining cases point at the same
755  // successor.
756  BasicBlock *CommonSuccBB = nullptr;
757  if (SI.getNumCases() > 0 &&
758  all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
759  return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
760  }))
761  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
762  if (!DefaultExitBB) {
763  // If we're not unswitching the default, we need it to match any cases to
764  // have a common successor or if we have no cases it is the common
765  // successor.
766  if (SI.getNumCases() == 0)
767  CommonSuccBB = SI.getDefaultDest();
768  else if (SI.getDefaultDest() != CommonSuccBB)
769  CommonSuccBB = nullptr;
770  }
771 
772  // Split the preheader, so that we know that there is a safe place to insert
773  // the switch.
774  BasicBlock *OldPH = L.getLoopPreheader();
775  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
776  OldPH->getTerminator()->eraseFromParent();
777 
778  // Now add the unswitched switch.
779  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
780  SwitchInstProfUpdateWrapper NewSIW(*NewSI);
781 
782  // Rewrite the IR for the unswitched basic blocks. This requires two steps.
783  // First, we split any exit blocks with remaining in-loop predecessors. Then
784  // we update the PHIs in one of two ways depending on if there was a split.
785  // We walk in reverse so that we split in the same order as the cases
786  // appeared. This is purely for convenience of reading the resulting IR, but
787  // it doesn't cost anything really.
788  SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
790  // Handle the default exit if necessary.
791  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
792  // ranges aren't quite powerful enough yet.
793  if (DefaultExitBB) {
794  if (pred_empty(DefaultExitBB)) {
795  UnswitchedExitBBs.insert(DefaultExitBB);
796  rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
797  } else {
798  auto *SplitBB =
799  SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
800  rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
801  *ParentBB, *OldPH,
802  /*FullUnswitch*/ true);
803  DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
804  }
805  }
806  // Note that we must use a reference in the for loop so that we update the
807  // container.
808  for (auto &ExitCase : reverse(ExitCases)) {
809  // Grab a reference to the exit block in the pair so that we can update it.
810  BasicBlock *ExitBB = std::get<1>(ExitCase);
811 
812  // If this case is the last edge into the exit block, we can simply reuse it
813  // as it will no longer be a loop exit. No mapping necessary.
814  if (pred_empty(ExitBB)) {
815  // Only rewrite once.
816  if (UnswitchedExitBBs.insert(ExitBB).second)
817  rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
818  continue;
819  }
820 
821  // Otherwise we need to split the exit block so that we retain an exit
822  // block from the loop and a target for the unswitched condition.
823  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
824  if (!SplitExitBB) {
825  // If this is the first time we see this, do the split and remember it.
826  SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
827  rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
828  *ParentBB, *OldPH,
829  /*FullUnswitch*/ true);
830  }
831  // Update the case pair to point to the split block.
832  std::get<1>(ExitCase) = SplitExitBB;
833  }
834 
835  // Now add the unswitched cases. We do this in reverse order as we built them
836  // in reverse order.
837  for (auto &ExitCase : reverse(ExitCases)) {
838  ConstantInt *CaseVal = std::get<0>(ExitCase);
839  BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
840 
841  NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
842  }
843 
844  // If the default was unswitched, re-point it and add explicit cases for
845  // entering the loop.
846  if (DefaultExitBB) {
847  NewSIW->setDefaultDest(DefaultExitBB);
848  NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
849 
850  // We removed all the exit cases, so we just copy the cases to the
851  // unswitched switch.
852  for (const auto &Case : SI.cases())
853  NewSIW.addCase(Case.getCaseValue(), NewPH,
855  } else if (DefaultCaseWeight) {
856  // We have to set branch weight of the default case.
857  uint64_t SW = *DefaultCaseWeight;
858  for (const auto &Case : SI.cases()) {
859  auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
860  assert(W &&
861  "case weight must be defined as default case weight is defined");
862  SW += *W;
863  }
864  NewSIW.setSuccessorWeight(0, SW);
865  }
866 
867  // If we ended up with a common successor for every path through the switch
868  // after unswitching, rewrite it to an unconditional branch to make it easy
869  // to recognize. Otherwise we potentially have to recognize the default case
870  // pointing at unreachable and other complexity.
871  if (CommonSuccBB) {
872  BasicBlock *BB = SI.getParent();
873  // We may have had multiple edges to this common successor block, so remove
874  // them as predecessors. We skip the first one, either the default or the
875  // actual first case.
876  bool SkippedFirst = DefaultExitBB == nullptr;
877  for (auto Case : SI.cases()) {
878  assert(Case.getCaseSuccessor() == CommonSuccBB &&
879  "Non-common successor!");
880  (void)Case;
881  if (!SkippedFirst) {
882  SkippedFirst = true;
883  continue;
884  }
885  CommonSuccBB->removePredecessor(BB,
886  /*KeepOneInputPHIs*/ true);
887  }
888  // Now nuke the switch and replace it with a direct branch.
889  SIW.eraseFromParent();
890  BranchInst::Create(CommonSuccBB, BB);
891  } else if (DefaultExitBB) {
892  assert(SI.getNumCases() > 0 &&
893  "If we had no cases we'd have a common successor!");
894  // Move the last case to the default successor. This is valid as if the
895  // default got unswitched it cannot be reached. This has the advantage of
896  // being simple and keeping the number of edges from this switch to
897  // successors the same, and avoiding any PHI update complexity.
898  auto LastCaseI = std::prev(SI.case_end());
899 
900  SI.setDefaultDest(LastCaseI->getCaseSuccessor());
901  SIW.setSuccessorWeight(
902  0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
903  SIW.removeCase(LastCaseI);
904  }
905 
906  // Walk the unswitched exit blocks and the unswitched split blocks and update
907  // the dominator tree based on the CFG edits. While we are walking unordered
908  // containers here, the API for applyUpdates takes an unordered list of
909  // updates and requires them to not contain duplicates.
911  for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
912  DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
913  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
914  }
915  for (auto SplitUnswitchedPair : SplitExitBBMap) {
916  DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
917  DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
918  }
919 
920  if (MSSAU) {
921  MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
922  if (VerifyMemorySSA)
923  MSSAU->getMemorySSA()->verifyMemorySSA();
924  } else {
925  DT.applyUpdates(DTUpdates);
926  }
927 
929 
930  // We may have changed the nesting relationship for this loop so hoist it to
931  // its correct parent if needed.
932  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
933 
934  if (MSSAU && VerifyMemorySSA)
935  MSSAU->getMemorySSA()->verifyMemorySSA();
936 
937  ++NumTrivial;
938  ++NumSwitches;
939  LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
940  return true;
941 }
942 
943 /// This routine scans the loop to find a branch or switch which occurs before
944 /// any side effects occur. These can potentially be unswitched without
945 /// duplicating the loop. If a branch or switch is successfully unswitched the
946 /// scanning continues to see if subsequent branches or switches have become
947 /// trivial. Once all trivial candidates have been unswitched, this routine
948 /// returns.
949 ///
950 /// The return value indicates whether anything was unswitched (and therefore
951 /// changed).
952 ///
953 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
954 /// invalidated by this.
956  LoopInfo &LI, ScalarEvolution *SE,
957  MemorySSAUpdater *MSSAU) {
958  bool Changed = false;
959 
960  // If loop header has only one reachable successor we should keep looking for
961  // trivial condition candidates in the successor as well. An alternative is
962  // to constant fold conditions and merge successors into loop header (then we
963  // only need to check header's terminator). The reason for not doing this in
964  // LoopUnswitch pass is that it could potentially break LoopPassManager's
965  // invariants. Folding dead branches could either eliminate the current loop
966  // or make other loops unreachable. LCSSA form might also not be preserved
967  // after deleting branches. The following code keeps traversing loop header's
968  // successors until it finds the trivial condition candidate (condition that
969  // is not a constant). Since unswitching generates branches with constant
970  // conditions, this scenario could be very common in practice.
971  BasicBlock *CurrentBB = L.getHeader();
973  Visited.insert(CurrentBB);
974  do {
975  // Check if there are any side-effecting instructions (e.g. stores, calls,
976  // volatile loads) in the part of the loop that the code *would* execute
977  // without unswitching.
978  if (MSSAU) // Possible early exit with MSSA
979  if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
980  if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
981  return Changed;
982  if (llvm::any_of(*CurrentBB,
983  [](Instruction &I) { return I.mayHaveSideEffects(); }))
984  return Changed;
985 
986  Instruction *CurrentTerm = CurrentBB->getTerminator();
987 
988  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
989  // Don't bother trying to unswitch past a switch with a constant
990  // condition. This should be removed prior to running this pass by
991  // simplifycfg.
992  if (isa<Constant>(SI->getCondition()))
993  return Changed;
994 
995  if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
996  // Couldn't unswitch this one so we're done.
997  return Changed;
998 
999  // Mark that we managed to unswitch something.
1000  Changed = true;
1001 
1002  // If unswitching turned the terminator into an unconditional branch then
1003  // we can continue. The unswitching logic specifically works to fold any
1004  // cases it can into an unconditional branch to make it easier to
1005  // recognize here.
1006  auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1007  if (!BI || BI->isConditional())
1008  return Changed;
1009 
1010  CurrentBB = BI->getSuccessor(0);
1011  continue;
1012  }
1013 
1014  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1015  if (!BI)
1016  // We do not understand other terminator instructions.
1017  return Changed;
1018 
1019  // Don't bother trying to unswitch past an unconditional branch or a branch
1020  // with a constant value. These should be removed by simplifycfg prior to
1021  // running this pass.
1022  if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1023  return Changed;
1024 
1025  // Found a trivial condition candidate: non-foldable conditional branch. If
1026  // we fail to unswitch this, we can't do anything else that is trivial.
1027  if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1028  return Changed;
1029 
1030  // Mark that we managed to unswitch something.
1031  Changed = true;
1032 
1033  // If we only unswitched some of the conditions feeding the branch, we won't
1034  // have collapsed it to a single successor.
1035  BI = cast<BranchInst>(CurrentBB->getTerminator());
1036  if (BI->isConditional())
1037  return Changed;
1038 
1039  // Follow the newly unconditional branch into its successor.
1040  CurrentBB = BI->getSuccessor(0);
1041 
1042  // When continuing, if we exit the loop or reach a previous visited block,
1043  // then we can not reach any trivial condition candidates (unfoldable
1044  // branch instructions or switch instructions) and no unswitch can happen.
1045  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1046 
1047  return Changed;
1048 }
1049 
1050 /// Build the cloned blocks for an unswitched copy of the given loop.
1051 ///
1052 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1053 /// after the split block (`SplitBB`) that will be used to select between the
1054 /// cloned and original loop.
1055 ///
1056 /// This routine handles cloning all of the necessary loop blocks and exit
1057 /// blocks including rewriting their instructions and the relevant PHI nodes.
1058 /// Any loop blocks or exit blocks which are dominated by a different successor
1059 /// than the one for this clone of the loop blocks can be trivially skipped. We
1060 /// use the `DominatingSucc` map to determine whether a block satisfies that
1061 /// property with a simple map lookup.
1062 ///
1063 /// It also correctly creates the unconditional branch in the cloned
1064 /// unswitched parent block to only point at the unswitched successor.
1065 ///
1066 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1067 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
1068 /// the cloned blocks (and their loops) are left without full `LoopInfo`
1069 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1070 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
1071 /// instead the caller must recompute an accurate DT. It *does* correctly
1072 /// update the `AssumptionCache` provided in `AC`.
1074  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1075  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1076  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1077  const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1078  ValueToValueMapTy &VMap,
1080  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
1081  SmallVector<BasicBlock *, 4> NewBlocks;
1082  NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1083 
1084  // We will need to clone a bunch of blocks, wrap up the clone operation in
1085  // a helper.
1086  auto CloneBlock = [&](BasicBlock *OldBB) {
1087  // Clone the basic block and insert it before the new preheader.
1088  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1089  NewBB->moveBefore(LoopPH);
1090 
1091  // Record this block and the mapping.
1092  NewBlocks.push_back(NewBB);
1093  VMap[OldBB] = NewBB;
1094 
1095  return NewBB;
1096  };
1097 
1098  // We skip cloning blocks when they have a dominating succ that is not the
1099  // succ we are cloning for.
1100  auto SkipBlock = [&](BasicBlock *BB) {
1101  auto It = DominatingSucc.find(BB);
1102  return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1103  };
1104 
1105  // First, clone the preheader.
1106  auto *ClonedPH = CloneBlock(LoopPH);
1107 
1108  // Then clone all the loop blocks, skipping the ones that aren't necessary.
1109  for (auto *LoopBB : L.blocks())
1110  if (!SkipBlock(LoopBB))
1111  CloneBlock(LoopBB);
1112 
1113  // Split all the loop exit edges so that when we clone the exit blocks, if
1114  // any of the exit blocks are *also* a preheader for some other loop, we
1115  // don't create multiple predecessors entering the loop header.
1116  for (auto *ExitBB : ExitBlocks) {
1117  if (SkipBlock(ExitBB))
1118  continue;
1119 
1120  // When we are going to clone an exit, we don't need to clone all the
1121  // instructions in the exit block and we want to ensure we have an easy
1122  // place to merge the CFG, so split the exit first. This is always safe to
1123  // do because there cannot be any non-loop predecessors of a loop exit in
1124  // loop simplified form.
1125  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1126 
1127  // Rearrange the names to make it easier to write test cases by having the
1128  // exit block carry the suffix rather than the merge block carrying the
1129  // suffix.
1130  MergeBB->takeName(ExitBB);
1131  ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1132 
1133  // Now clone the original exit block.
1134  auto *ClonedExitBB = CloneBlock(ExitBB);
1135  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1136  "Exit block should have been split to have one successor!");
1137  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1138  "Cloned exit block has the wrong successor!");
1139 
1140  // Remap any cloned instructions and create a merge phi node for them.
1141  for (auto ZippedInsts : llvm::zip_first(
1142  llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1143  llvm::make_range(ClonedExitBB->begin(),
1144  std::prev(ClonedExitBB->end())))) {
1145  Instruction &I = std::get<0>(ZippedInsts);
1146  Instruction &ClonedI = std::get<1>(ZippedInsts);
1147 
1148  // The only instructions in the exit block should be PHI nodes and
1149  // potentially a landing pad.
1150  assert(
1151  (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1152  "Bad instruction in exit block!");
1153  // We should have a value map between the instruction and its clone.
1154  assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1155 
1156  auto *MergePN =
1157  PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1158  &*MergeBB->getFirstInsertionPt());
1159  I.replaceAllUsesWith(MergePN);
1160  MergePN->addIncoming(&I, ExitBB);
1161  MergePN->addIncoming(&ClonedI, ClonedExitBB);
1162  }
1163  }
1164 
1165  // Rewrite the instructions in the cloned blocks to refer to the instructions
1166  // in the cloned blocks. We have to do this as a second pass so that we have
1167  // everything available. Also, we have inserted new instructions which may
1168  // include assume intrinsics, so we update the assumption cache while
1169  // processing this.
1170  for (auto *ClonedBB : NewBlocks)
1171  for (Instruction &I : *ClonedBB) {
1172  RemapInstruction(&I, VMap,
1174  if (auto *II = dyn_cast<AssumeInst>(&I))
1175  AC.registerAssumption(II);
1176  }
1177 
1178  // Update any PHI nodes in the cloned successors of the skipped blocks to not
1179  // have spurious incoming values.
1180  for (auto *LoopBB : L.blocks())
1181  if (SkipBlock(LoopBB))
1182  for (auto *SuccBB : successors(LoopBB))
1183  if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1184  for (PHINode &PN : ClonedSuccBB->phis())
1185  PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1186 
1187  // Remove the cloned parent as a predecessor of any successor we ended up
1188  // cloning other than the unswitched one.
1189  auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1190  for (auto *SuccBB : successors(ParentBB)) {
1191  if (SuccBB == UnswitchedSuccBB)
1192  continue;
1193 
1194  auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1195  if (!ClonedSuccBB)
1196  continue;
1197 
1198  ClonedSuccBB->removePredecessor(ClonedParentBB,
1199  /*KeepOneInputPHIs*/ true);
1200  }
1201 
1202  // Replace the cloned branch with an unconditional branch to the cloned
1203  // unswitched successor.
1204  auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1205  Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1206  // Trivial Simplification. If Terminator is a conditional branch and
1207  // condition becomes dead - erase it.
1208  Value *ClonedConditionToErase = nullptr;
1209  if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1210  ClonedConditionToErase = BI->getCondition();
1211  else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1212  ClonedConditionToErase = SI->getCondition();
1213 
1214  ClonedTerminator->eraseFromParent();
1215  BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1216 
1217  if (ClonedConditionToErase)
1218  RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1219  MSSAU);
1220 
1221  // If there are duplicate entries in the PHI nodes because of multiple edges
1222  // to the unswitched successor, we need to nuke all but one as we replaced it
1223  // with a direct branch.
1224  for (PHINode &PN : ClonedSuccBB->phis()) {
1225  bool Found = false;
1226  // Loop over the incoming operands backwards so we can easily delete as we
1227  // go without invalidating the index.
1228  for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1229  if (PN.getIncomingBlock(i) != ClonedParentBB)
1230  continue;
1231  if (!Found) {
1232  Found = true;
1233  continue;
1234  }
1235  PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1236  }
1237  }
1238 
1239  // Record the domtree updates for the new blocks.
1241  for (auto *ClonedBB : NewBlocks) {
1242  for (auto *SuccBB : successors(ClonedBB))
1243  if (SuccSet.insert(SuccBB).second)
1244  DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1245  SuccSet.clear();
1246  }
1247 
1248  return ClonedPH;
1249 }
1250 
1251 /// Recursively clone the specified loop and all of its children.
1252 ///
1253 /// The target parent loop for the clone should be provided, or can be null if
1254 /// the clone is a top-level loop. While cloning, all the blocks are mapped
1255 /// with the provided value map. The entire original loop must be present in
1256 /// the value map. The cloned loop is returned.
1257 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1258  const ValueToValueMapTy &VMap, LoopInfo &LI) {
1259  auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1260  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1261  ClonedL.reserveBlocks(OrigL.getNumBlocks());
1262  for (auto *BB : OrigL.blocks()) {
1263  auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1264  ClonedL.addBlockEntry(ClonedBB);
1265  if (LI.getLoopFor(BB) == &OrigL)
1266  LI.changeLoopFor(ClonedBB, &ClonedL);
1267  }
1268  };
1269 
1270  // We specially handle the first loop because it may get cloned into
1271  // a different parent and because we most commonly are cloning leaf loops.
1272  Loop *ClonedRootL = LI.AllocateLoop();
1273  if (RootParentL)
1274  RootParentL->addChildLoop(ClonedRootL);
1275  else
1276  LI.addTopLevelLoop(ClonedRootL);
1277  AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1278 
1279  if (OrigRootL.isInnermost())
1280  return ClonedRootL;
1281 
1282  // If we have a nest, we can quickly clone the entire loop nest using an
1283  // iterative approach because it is a tree. We keep the cloned parent in the
1284  // data structure to avoid repeatedly querying through a map to find it.
1285  SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1286  // Build up the loops to clone in reverse order as we'll clone them from the
1287  // back.
1288  for (Loop *ChildL : llvm::reverse(OrigRootL))
1289  LoopsToClone.push_back({ClonedRootL, ChildL});
1290  do {
1291  Loop *ClonedParentL, *L;
1292  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1293  Loop *ClonedL = LI.AllocateLoop();
1294  ClonedParentL->addChildLoop(ClonedL);
1295  AddClonedBlocksToLoop(*L, *ClonedL);
1296  for (Loop *ChildL : llvm::reverse(*L))
1297  LoopsToClone.push_back({ClonedL, ChildL});
1298  } while (!LoopsToClone.empty());
1299 
1300  return ClonedRootL;
1301 }
1302 
1303 /// Build the cloned loops of an original loop from unswitching.
1304 ///
1305 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1306 /// operation. We need to re-verify that there even is a loop (as the backedge
1307 /// may not have been cloned), and even if there are remaining backedges the
1308 /// backedge set may be different. However, we know that each child loop is
1309 /// undisturbed, we only need to find where to place each child loop within
1310 /// either any parent loop or within a cloned version of the original loop.
1311 ///
1312 /// Because child loops may end up cloned outside of any cloned version of the
1313 /// original loop, multiple cloned sibling loops may be created. All of them
1314 /// are returned so that the newly introduced loop nest roots can be
1315 /// identified.
1316 static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1317  const ValueToValueMapTy &VMap, LoopInfo &LI,
1318  SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1319  Loop *ClonedL = nullptr;
1320 
1321  auto *OrigPH = OrigL.getLoopPreheader();
1322  auto *OrigHeader = OrigL.getHeader();
1323 
1324  auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1325  auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1326 
1327  // We need to know the loops of the cloned exit blocks to even compute the
1328  // accurate parent loop. If we only clone exits to some parent of the
1329  // original parent, we want to clone into that outer loop. We also keep track
1330  // of the loops that our cloned exit blocks participate in.
1331  Loop *ParentL = nullptr;
1332  SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1334  ClonedExitsInLoops.reserve(ExitBlocks.size());
1335  for (auto *ExitBB : ExitBlocks)
1336  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1337  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1338  ExitLoopMap[ClonedExitBB] = ExitL;
1339  ClonedExitsInLoops.push_back(ClonedExitBB);
1340  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1341  ParentL = ExitL;
1342  }
1343  assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1344  ParentL->contains(OrigL.getParentLoop())) &&
1345  "The computed parent loop should always contain (or be) the parent of "
1346  "the original loop.");
1347 
1348  // We build the set of blocks dominated by the cloned header from the set of
1349  // cloned blocks out of the original loop. While not all of these will
1350  // necessarily be in the cloned loop, it is enough to establish that they
1351  // aren't in unreachable cycles, etc.
1352  SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1353  for (auto *BB : OrigL.blocks())
1354  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1355  ClonedLoopBlocks.insert(ClonedBB);
1356 
1357  // Rebuild the set of blocks that will end up in the cloned loop. We may have
1358  // skipped cloning some region of this loop which can in turn skip some of
1359  // the backedges so we have to rebuild the blocks in the loop based on the
1360  // backedges that remain after cloning.
1362  SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1363  for (auto *Pred : predecessors(ClonedHeader)) {
1364  // The only possible non-loop header predecessor is the preheader because
1365  // we know we cloned the loop in simplified form.
1366  if (Pred == ClonedPH)
1367  continue;
1368 
1369  // Because the loop was in simplified form, the only non-loop predecessor
1370  // should be the preheader.
1371  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1372  "header other than the preheader "
1373  "that is not part of the loop!");
1374 
1375  // Insert this block into the loop set and on the first visit (and if it
1376  // isn't the header we're currently walking) put it into the worklist to
1377  // recurse through.
1378  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1379  Worklist.push_back(Pred);
1380  }
1381 
1382  // If we had any backedges then there *is* a cloned loop. Put the header into
1383  // the loop set and then walk the worklist backwards to find all the blocks
1384  // that remain within the loop after cloning.
1385  if (!BlocksInClonedLoop.empty()) {
1386  BlocksInClonedLoop.insert(ClonedHeader);
1387 
1388  while (!Worklist.empty()) {
1389  BasicBlock *BB = Worklist.pop_back_val();
1390  assert(BlocksInClonedLoop.count(BB) &&
1391  "Didn't put block into the loop set!");
1392 
1393  // Insert any predecessors that are in the possible set into the cloned
1394  // set, and if the insert is successful, add them to the worklist. Note
1395  // that we filter on the blocks that are definitely reachable via the
1396  // backedge to the loop header so we may prune out dead code within the
1397  // cloned loop.
1398  for (auto *Pred : predecessors(BB))
1399  if (ClonedLoopBlocks.count(Pred) &&
1400  BlocksInClonedLoop.insert(Pred).second)
1401  Worklist.push_back(Pred);
1402  }
1403 
1404  ClonedL = LI.AllocateLoop();
1405  if (ParentL) {
1406  ParentL->addBasicBlockToLoop(ClonedPH, LI);
1407  ParentL->addChildLoop(ClonedL);
1408  } else {
1409  LI.addTopLevelLoop(ClonedL);
1410  }
1411  NonChildClonedLoops.push_back(ClonedL);
1412 
1413  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1414  // We don't want to just add the cloned loop blocks based on how we
1415  // discovered them. The original order of blocks was carefully built in
1416  // a way that doesn't rely on predecessor ordering. Rather than re-invent
1417  // that logic, we just re-walk the original blocks (and those of the child
1418  // loops) and filter them as we add them into the cloned loop.
1419  for (auto *BB : OrigL.blocks()) {
1420  auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1421  if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1422  continue;
1423 
1424  // Directly add the blocks that are only in this loop.
1425  if (LI.getLoopFor(BB) == &OrigL) {
1426  ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1427  continue;
1428  }
1429 
1430  // We want to manually add it to this loop and parents.
1431  // Registering it with LoopInfo will happen when we clone the top
1432  // loop for this block.
1433  for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1434  PL->addBlockEntry(ClonedBB);
1435  }
1436 
1437  // Now add each child loop whose header remains within the cloned loop. All
1438  // of the blocks within the loop must satisfy the same constraints as the
1439  // header so once we pass the header checks we can just clone the entire
1440  // child loop nest.
1441  for (Loop *ChildL : OrigL) {
1442  auto *ClonedChildHeader =
1443  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1444  if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1445  continue;
1446 
1447 #ifndef NDEBUG
1448  // We should never have a cloned child loop header but fail to have
1449  // all of the blocks for that child loop.
1450  for (auto *ChildLoopBB : ChildL->blocks())
1451  assert(BlocksInClonedLoop.count(
1452  cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1453  "Child cloned loop has a header within the cloned outer "
1454  "loop but not all of its blocks!");
1455 #endif
1456 
1457  cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1458  }
1459  }
1460 
1461  // Now that we've handled all the components of the original loop that were
1462  // cloned into a new loop, we still need to handle anything from the original
1463  // loop that wasn't in a cloned loop.
1464 
1465  // Figure out what blocks are left to place within any loop nest containing
1466  // the unswitched loop. If we never formed a loop, the cloned PH is one of
1467  // them.
1468  SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1469  if (BlocksInClonedLoop.empty())
1470  UnloopedBlockSet.insert(ClonedPH);
1471  for (auto *ClonedBB : ClonedLoopBlocks)
1472  if (!BlocksInClonedLoop.count(ClonedBB))
1473  UnloopedBlockSet.insert(ClonedBB);
1474 
1475  // Copy the cloned exits and sort them in ascending loop depth, we'll work
1476  // backwards across these to process them inside out. The order shouldn't
1477  // matter as we're just trying to build up the map from inside-out; we use
1478  // the map in a more stably ordered way below.
1479  auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1480  llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1481  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1482  ExitLoopMap.lookup(RHS)->getLoopDepth();
1483  });
1484 
1485  // Populate the existing ExitLoopMap with everything reachable from each
1486  // exit, starting from the inner most exit.
1487  while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1488  assert(Worklist.empty() && "Didn't clear worklist!");
1489 
1490  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1491  Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1492 
1493  // Walk the CFG back until we hit the cloned PH adding everything reachable
1494  // and in the unlooped set to this exit block's loop.
1495  Worklist.push_back(ExitBB);
1496  do {
1497  BasicBlock *BB = Worklist.pop_back_val();
1498  // We can stop recursing at the cloned preheader (if we get there).
1499  if (BB == ClonedPH)
1500  continue;
1501 
1502  for (BasicBlock *PredBB : predecessors(BB)) {
1503  // If this pred has already been moved to our set or is part of some
1504  // (inner) loop, no update needed.
1505  if (!UnloopedBlockSet.erase(PredBB)) {
1506  assert(
1507  (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1508  "Predecessor not mapped to a loop!");
1509  continue;
1510  }
1511 
1512  // We just insert into the loop set here. We'll add these blocks to the
1513  // exit loop after we build up the set in an order that doesn't rely on
1514  // predecessor order (which in turn relies on use list order).
1515  bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1516  (void)Inserted;
1517  assert(Inserted && "Should only visit an unlooped block once!");
1518 
1519  // And recurse through to its predecessors.
1520  Worklist.push_back(PredBB);
1521  }
1522  } while (!Worklist.empty());
1523  }
1524 
1525  // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1526  // blocks to their outer loops, walk the cloned blocks and the cloned exits
1527  // in their original order adding them to the correct loop.
1528 
1529  // We need a stable insertion order. We use the order of the original loop
1530  // order and map into the correct parent loop.
1531  for (auto *BB : llvm::concat<BasicBlock *const>(
1532  makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1533  if (Loop *OuterL = ExitLoopMap.lookup(BB))
1534  OuterL->addBasicBlockToLoop(BB, LI);
1535 
1536 #ifndef NDEBUG
1537  for (auto &BBAndL : ExitLoopMap) {
1538  auto *BB = BBAndL.first;
1539  auto *OuterL = BBAndL.second;
1540  assert(LI.getLoopFor(BB) == OuterL &&
1541  "Failed to put all blocks into outer loops!");
1542  }
1543 #endif
1544 
1545  // Now that all the blocks are placed into the correct containing loop in the
1546  // absence of child loops, find all the potentially cloned child loops and
1547  // clone them into whatever outer loop we placed their header into.
1548  for (Loop *ChildL : OrigL) {
1549  auto *ClonedChildHeader =
1550  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1551  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1552  continue;
1553 
1554 #ifndef NDEBUG
1555  for (auto *ChildLoopBB : ChildL->blocks())
1556  assert(VMap.count(ChildLoopBB) &&
1557  "Cloned a child loop header but not all of that loops blocks!");
1558 #endif
1559 
1560  NonChildClonedLoops.push_back(cloneLoopNest(
1561  *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1562  }
1563 }
1564 
1565 static void
1567  ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1568  DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1569  // Find all the dead clones, and remove them from their successors.
1570  SmallVector<BasicBlock *, 16> DeadBlocks;
1571  for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1572  for (auto &VMap : VMaps)
1573  if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1574  if (!DT.isReachableFromEntry(ClonedBB)) {
1575  for (BasicBlock *SuccBB : successors(ClonedBB))
1576  SuccBB->removePredecessor(ClonedBB);
1577  DeadBlocks.push_back(ClonedBB);
1578  }
1579 
1580  // Remove all MemorySSA in the dead blocks
1581  if (MSSAU) {
1582  SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1583  DeadBlocks.end());
1584  MSSAU->removeBlocks(DeadBlockSet);
1585  }
1586 
1587  // Drop any remaining references to break cycles.
1588  for (BasicBlock *BB : DeadBlocks)
1589  BB->dropAllReferences();
1590  // Erase them from the IR.
1591  for (BasicBlock *BB : DeadBlocks)
1592  BB->eraseFromParent();
1593 }
1594 
1595 static void
1597  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1598  DominatorTree &DT, LoopInfo &LI,
1599  MemorySSAUpdater *MSSAU,
1600  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
1601  // Find all the dead blocks tied to this loop, and remove them from their
1602  // successors.
1603  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1604 
1605  // Start with loop/exit blocks and get a transitive closure of reachable dead
1606  // blocks.
1607  SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1608  ExitBlocks.end());
1609  DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1610  while (!DeathCandidates.empty()) {
1611  auto *BB = DeathCandidates.pop_back_val();
1612  if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1613  for (BasicBlock *SuccBB : successors(BB)) {
1614  SuccBB->removePredecessor(BB);
1615  DeathCandidates.push_back(SuccBB);
1616  }
1617  DeadBlockSet.insert(BB);
1618  }
1619  }
1620 
1621  // Remove all MemorySSA in the dead blocks
1622  if (MSSAU)
1623  MSSAU->removeBlocks(DeadBlockSet);
1624 
1625  // Filter out the dead blocks from the exit blocks list so that it can be
1626  // used in the caller.
1627  llvm::erase_if(ExitBlocks,
1628  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1629 
1630  // Walk from this loop up through its parents removing all of the dead blocks.
1631  for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1632  for (auto *BB : DeadBlockSet)
1633  ParentL->getBlocksSet().erase(BB);
1634  llvm::erase_if(ParentL->getBlocksVector(),
1635  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1636  }
1637 
1638  // Now delete the dead child loops. This raw delete will clear them
1639  // recursively.
1640  llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1641  if (!DeadBlockSet.count(ChildL->getHeader()))
1642  return false;
1643 
1644  assert(llvm::all_of(ChildL->blocks(),
1645  [&](BasicBlock *ChildBB) {
1646  return DeadBlockSet.count(ChildBB);
1647  }) &&
1648  "If the child loop header is dead all blocks in the child loop must "
1649  "be dead as well!");
1650  DestroyLoopCB(*ChildL, ChildL->getName());
1651  LI.destroy(ChildL);
1652  return true;
1653  });
1654 
1655  // Remove the loop mappings for the dead blocks and drop all the references
1656  // from these blocks to others to handle cyclic references as we start
1657  // deleting the blocks themselves.
1658  for (auto *BB : DeadBlockSet) {
1659  // Check that the dominator tree has already been updated.
1660  assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1661  LI.changeLoopFor(BB, nullptr);
1662  // Drop all uses of the instructions to make sure we won't have dangling
1663  // uses in other blocks.
1664  for (auto &I : *BB)
1665  if (!I.use_empty())
1666  I.replaceAllUsesWith(UndefValue::get(I.getType()));
1667  BB->dropAllReferences();
1668  }
1669 
1670  // Actually delete the blocks now that they've been fully unhooked from the
1671  // IR.
1672  for (auto *BB : DeadBlockSet)
1673  BB->eraseFromParent();
1674 }
1675 
1676 /// Recompute the set of blocks in a loop after unswitching.
1677 ///
1678 /// This walks from the original headers predecessors to rebuild the loop. We
1679 /// take advantage of the fact that new blocks can't have been added, and so we
1680 /// filter by the original loop's blocks. This also handles potentially
1681 /// unreachable code that we don't want to explore but might be found examining
1682 /// the predecessors of the header.
1683 ///
1684 /// If the original loop is no longer a loop, this will return an empty set. If
1685 /// it remains a loop, all the blocks within it will be added to the set
1686 /// (including those blocks in inner loops).
1688  LoopInfo &LI) {
1690 
1691  auto *PH = L.getLoopPreheader();
1692  auto *Header = L.getHeader();
1693 
1694  // A worklist to use while walking backwards from the header.
1696 
1697  // First walk the predecessors of the header to find the backedges. This will
1698  // form the basis of our walk.
1699  for (auto *Pred : predecessors(Header)) {
1700  // Skip the preheader.
1701  if (Pred == PH)
1702  continue;
1703 
1704  // Because the loop was in simplified form, the only non-loop predecessor
1705  // is the preheader.
1706  assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1707  "than the preheader that is not part of the "
1708  "loop!");
1709 
1710  // Insert this block into the loop set and on the first visit and, if it
1711  // isn't the header we're currently walking, put it into the worklist to
1712  // recurse through.
1713  if (LoopBlockSet.insert(Pred).second && Pred != Header)
1714  Worklist.push_back(Pred);
1715  }
1716 
1717  // If no backedges were found, we're done.
1718  if (LoopBlockSet.empty())
1719  return LoopBlockSet;
1720 
1721  // We found backedges, recurse through them to identify the loop blocks.
1722  while (!Worklist.empty()) {
1723  BasicBlock *BB = Worklist.pop_back_val();
1724  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1725 
1726  // No need to walk past the header.
1727  if (BB == Header)
1728  continue;
1729 
1730  // Because we know the inner loop structure remains valid we can use the
1731  // loop structure to jump immediately across the entire nested loop.
1732  // Further, because it is in loop simplified form, we can directly jump
1733  // to its preheader afterward.
1734  if (Loop *InnerL = LI.getLoopFor(BB))
1735  if (InnerL != &L) {
1736  assert(L.contains(InnerL) &&
1737  "Should not reach a loop *outside* this loop!");
1738  // The preheader is the only possible predecessor of the loop so
1739  // insert it into the set and check whether it was already handled.
1740  auto *InnerPH = InnerL->getLoopPreheader();
1741  assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1742  "but not contain the inner loop "
1743  "preheader!");
1744  if (!LoopBlockSet.insert(InnerPH).second)
1745  // The only way to reach the preheader is through the loop body
1746  // itself so if it has been visited the loop is already handled.
1747  continue;
1748 
1749  // Insert all of the blocks (other than those already present) into
1750  // the loop set. We expect at least the block that led us to find the
1751  // inner loop to be in the block set, but we may also have other loop
1752  // blocks if they were already enqueued as predecessors of some other
1753  // outer loop block.
1754  for (auto *InnerBB : InnerL->blocks()) {
1755  if (InnerBB == BB) {
1756  assert(LoopBlockSet.count(InnerBB) &&
1757  "Block should already be in the set!");
1758  continue;
1759  }
1760 
1761  LoopBlockSet.insert(InnerBB);
1762  }
1763 
1764  // Add the preheader to the worklist so we will continue past the
1765  // loop body.
1766  Worklist.push_back(InnerPH);
1767  continue;
1768  }
1769 
1770  // Insert any predecessors that were in the original loop into the new
1771  // set, and if the insert is successful, add them to the worklist.
1772  for (auto *Pred : predecessors(BB))
1773  if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1774  Worklist.push_back(Pred);
1775  }
1776 
1777  assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1778 
1779  // We've found all the blocks participating in the loop, return our completed
1780  // set.
1781  return LoopBlockSet;
1782 }
1783 
1784 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1785 ///
1786 /// The removal may have removed some child loops entirely but cannot have
1787 /// disturbed any remaining child loops. However, they may need to be hoisted
1788 /// to the parent loop (or to be top-level loops). The original loop may be
1789 /// completely removed.
1790 ///
1791 /// The sibling loops resulting from this update are returned. If the original
1792 /// loop remains a valid loop, it will be the first entry in this list with all
1793 /// of the newly sibling loops following it.
1794 ///
1795 /// Returns true if the loop remains a loop after unswitching, and false if it
1796 /// is no longer a loop after unswitching (and should not continue to be
1797 /// referenced).
1799  LoopInfo &LI,
1800  SmallVectorImpl<Loop *> &HoistedLoops) {
1801  auto *PH = L.getLoopPreheader();
1802 
1803  // Compute the actual parent loop from the exit blocks. Because we may have
1804  // pruned some exits the loop may be different from the original parent.
1805  Loop *ParentL = nullptr;
1806  SmallVector<Loop *, 4> ExitLoops;
1807  SmallVector<BasicBlock *, 4> ExitsInLoops;
1808  ExitsInLoops.reserve(ExitBlocks.size());
1809  for (auto *ExitBB : ExitBlocks)
1810  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1811  ExitLoops.push_back(ExitL);
1812  ExitsInLoops.push_back(ExitBB);
1813  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1814  ParentL = ExitL;
1815  }
1816 
1817  // Recompute the blocks participating in this loop. This may be empty if it
1818  // is no longer a loop.
1819  auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1820 
1821  // If we still have a loop, we need to re-set the loop's parent as the exit
1822  // block set changing may have moved it within the loop nest. Note that this
1823  // can only happen when this loop has a parent as it can only hoist the loop
1824  // *up* the nest.
1825  if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1826  // Remove this loop's (original) blocks from all of the intervening loops.
1827  for (Loop *IL = L.getParentLoop(); IL != ParentL;
1828  IL = IL->getParentLoop()) {
1829  IL->getBlocksSet().erase(PH);
1830  for (auto *BB : L.blocks())
1831  IL->getBlocksSet().erase(BB);
1832  llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1833  return BB == PH || L.contains(BB);
1834  });
1835  }
1836 
1837  LI.changeLoopFor(PH, ParentL);
1838  L.getParentLoop()->removeChildLoop(&L);
1839  if (ParentL)
1840  ParentL->addChildLoop(&L);
1841  else
1842  LI.addTopLevelLoop(&L);
1843  }
1844 
1845  // Now we update all the blocks which are no longer within the loop.
1846  auto &Blocks = L.getBlocksVector();
1847  auto BlocksSplitI =
1848  LoopBlockSet.empty()
1849  ? Blocks.begin()
1850  : std::stable_partition(
1851  Blocks.begin(), Blocks.end(),
1852  [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1853 
1854  // Before we erase the list of unlooped blocks, build a set of them.
1855  SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1856  if (LoopBlockSet.empty())
1857  UnloopedBlocks.insert(PH);
1858 
1859  // Now erase these blocks from the loop.
1860  for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1861  L.getBlocksSet().erase(BB);
1862  Blocks.erase(BlocksSplitI, Blocks.end());
1863 
1864  // Sort the exits in ascending loop depth, we'll work backwards across these
1865  // to process them inside out.
1866  llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1867  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1868  });
1869 
1870  // We'll build up a set for each exit loop.
1871  SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1872  Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1873 
1874  auto RemoveUnloopedBlocksFromLoop =
1875  [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1876  for (auto *BB : UnloopedBlocks)
1877  L.getBlocksSet().erase(BB);
1879  return UnloopedBlocks.count(BB);
1880  });
1881  };
1882 
1884  while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1885  assert(Worklist.empty() && "Didn't clear worklist!");
1886  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1887 
1888  // Grab the next exit block, in decreasing loop depth order.
1889  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1890  Loop &ExitL = *LI.getLoopFor(ExitBB);
1891  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1892 
1893  // Erase all of the unlooped blocks from the loops between the previous
1894  // exit loop and this exit loop. This works because the ExitInLoops list is
1895  // sorted in increasing order of loop depth and thus we visit loops in
1896  // decreasing order of loop depth.
1897  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1898  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1899 
1900  // Walk the CFG back until we hit the cloned PH adding everything reachable
1901  // and in the unlooped set to this exit block's loop.
1902  Worklist.push_back(ExitBB);
1903  do {
1904  BasicBlock *BB = Worklist.pop_back_val();
1905  // We can stop recursing at the cloned preheader (if we get there).
1906  if (BB == PH)
1907  continue;
1908 
1909  for (BasicBlock *PredBB : predecessors(BB)) {
1910  // If this pred has already been moved to our set or is part of some
1911  // (inner) loop, no update needed.
1912  if (!UnloopedBlocks.erase(PredBB)) {
1913  assert((NewExitLoopBlocks.count(PredBB) ||
1914  ExitL.contains(LI.getLoopFor(PredBB))) &&
1915  "Predecessor not in a nested loop (or already visited)!");
1916  continue;
1917  }
1918 
1919  // We just insert into the loop set here. We'll add these blocks to the
1920  // exit loop after we build up the set in a deterministic order rather
1921  // than the predecessor-influenced visit order.
1922  bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1923  (void)Inserted;
1924  assert(Inserted && "Should only visit an unlooped block once!");
1925 
1926  // And recurse through to its predecessors.
1927  Worklist.push_back(PredBB);
1928  }
1929  } while (!Worklist.empty());
1930 
1931  // If blocks in this exit loop were directly part of the original loop (as
1932  // opposed to a child loop) update the map to point to this exit loop. This
1933  // just updates a map and so the fact that the order is unstable is fine.
1934  for (auto *BB : NewExitLoopBlocks)
1935  if (Loop *BBL = LI.getLoopFor(BB))
1936  if (BBL == &L || !L.contains(BBL))
1937  LI.changeLoopFor(BB, &ExitL);
1938 
1939  // We will remove the remaining unlooped blocks from this loop in the next
1940  // iteration or below.
1941  NewExitLoopBlocks.clear();
1942  }
1943 
1944  // Any remaining unlooped blocks are no longer part of any loop unless they
1945  // are part of some child loop.
1946  for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1947  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1948  for (auto *BB : UnloopedBlocks)
1949  if (Loop *BBL = LI.getLoopFor(BB))
1950  if (BBL == &L || !L.contains(BBL))
1951  LI.changeLoopFor(BB, nullptr);
1952 
1953  // Sink all the child loops whose headers are no longer in the loop set to
1954  // the parent (or to be top level loops). We reach into the loop and directly
1955  // update its subloop vector to make this batch update efficient.
1956  auto &SubLoops = L.getSubLoopsVector();
1957  auto SubLoopsSplitI =
1958  LoopBlockSet.empty()
1959  ? SubLoops.begin()
1960  : std::stable_partition(
1961  SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1962  return LoopBlockSet.count(SubL->getHeader());
1963  });
1964  for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1965  HoistedLoops.push_back(HoistedL);
1966  HoistedL->setParentLoop(nullptr);
1967 
1968  // To compute the new parent of this hoisted loop we look at where we
1969  // placed the preheader above. We can't lookup the header itself because we
1970  // retained the mapping from the header to the hoisted loop. But the
1971  // preheader and header should have the exact same new parent computed
1972  // based on the set of exit blocks from the original loop as the preheader
1973  // is a predecessor of the header and so reached in the reverse walk. And
1974  // because the loops were all in simplified form the preheader of the
1975  // hoisted loop can't be part of some *other* loop.
1976  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1977  NewParentL->addChildLoop(HoistedL);
1978  else
1979  LI.addTopLevelLoop(HoistedL);
1980  }
1981  SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1982 
1983  // Actually delete the loop if nothing remained within it.
1984  if (Blocks.empty()) {
1985  assert(SubLoops.empty() &&
1986  "Failed to remove all subloops from the original loop!");
1987  if (Loop *ParentL = L.getParentLoop())
1988  ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1989  else
1990  LI.removeLoop(llvm::find(LI, &L));
1991  // markLoopAsDeleted for L should be triggered by the caller (it is typically
1992  // done by using the UnswitchCB callback).
1993  LI.destroy(&L);
1994  return false;
1995  }
1996 
1997  return true;
1998 }
1999 
2000 /// Helper to visit a dominator subtree, invoking a callable on each node.
2001 ///
2002 /// Returning false at any point will stop walking past that node of the tree.
2003 template <typename CallableT>
2004 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2005  SmallVector<DomTreeNode *, 4> DomWorklist;
2006  DomWorklist.push_back(DT[BB]);
2007 #ifndef NDEBUG
2009  Visited.insert(DT[BB]);
2010 #endif
2011  do {
2012  DomTreeNode *N = DomWorklist.pop_back_val();
2013 
2014  // Visit this node.
2015  if (!Callable(N->getBlock()))
2016  continue;
2017 
2018  // Accumulate the child nodes.
2019  for (DomTreeNode *ChildN : *N) {
2020  assert(Visited.insert(ChildN).second &&
2021  "Cannot visit a node twice when walking a tree!");
2022  DomWorklist.push_back(ChildN);
2023  }
2024  } while (!DomWorklist.empty());
2025 }
2026 
2028  Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2029  SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
2030  DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2031  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2032  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2033  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2034  auto *ParentBB = TI.getParent();
2035  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2036  SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2037 
2038  // We can only unswitch switches, conditional branches with an invariant
2039  // condition, or combining invariant conditions with an instruction or
2040  // partially invariant instructions.
2041  assert((SI || (BI && BI->isConditional())) &&
2042  "Can only unswitch switches and conditional branch!");
2043  bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2044  bool FullUnswitch =
2045  SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant);
2046  if (FullUnswitch)
2047  assert(Invariants.size() == 1 &&
2048  "Cannot have other invariants with full unswitching!");
2049  else
2050  assert(isa<Instruction>(BI->getCondition()) &&
2051  "Partial unswitching requires an instruction as the condition!");
2052 
2053  if (MSSAU && VerifyMemorySSA)
2054  MSSAU->getMemorySSA()->verifyMemorySSA();
2055 
2056  // Constant and BBs tracking the cloned and continuing successor. When we are
2057  // unswitching the entire condition, this can just be trivially chosen to
2058  // unswitch towards `true`. However, when we are unswitching a set of
2059  // invariants combined with `and` or `or` or partially invariant instructions,
2060  // the combining operation determines the best direction to unswitch: we want
2061  // to unswitch the direction that will collapse the branch.
2062  bool Direction = true;
2063  int ClonedSucc = 0;
2064  if (!FullUnswitch) {
2065  Value *Cond = BI->getCondition();
2066  (void)Cond;
2068  PartiallyInvariant) &&
2069  "Only `or`, `and`, an `select`, partially invariant instructions "
2070  "can combine invariants being unswitched.");
2071  if (!match(BI->getCondition(), m_LogicalOr())) {
2072  if (match(BI->getCondition(), m_LogicalAnd()) ||
2073  (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2074  Direction = false;
2075  ClonedSucc = 1;
2076  }
2077  }
2078  }
2079 
2080  BasicBlock *RetainedSuccBB =
2081  BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2082  SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2083  if (BI)
2084  UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2085  else
2086  for (auto Case : SI->cases())
2087  if (Case.getCaseSuccessor() != RetainedSuccBB)
2088  UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2089 
2090  assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2091  "Should not unswitch the same successor we are retaining!");
2092 
2093  // The branch should be in this exact loop. Any inner loop's invariant branch
2094  // should be handled by unswitching that inner loop. The caller of this
2095  // routine should filter out any candidates that remain (but were skipped for
2096  // whatever reason).
2097  assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2098 
2099  // Compute the parent loop now before we start hacking on things.
2100  Loop *ParentL = L.getParentLoop();
2101  // Get blocks in RPO order for MSSA update, before changing the CFG.
2102  LoopBlocksRPO LBRPO(&L);
2103  if (MSSAU)
2104  LBRPO.perform(&LI);
2105 
2106  // Compute the outer-most loop containing one of our exit blocks. This is the
2107  // furthest up our loopnest which can be mutated, which we will use below to
2108  // update things.
2109  Loop *OuterExitL = &L;
2110  for (auto *ExitBB : ExitBlocks) {
2111  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2112  if (!NewOuterExitL) {
2113  // We exited the entire nest with this block, so we're done.
2114  OuterExitL = nullptr;
2115  break;
2116  }
2117  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2118  OuterExitL = NewOuterExitL;
2119  }
2120 
2121  // At this point, we're definitely going to unswitch something so invalidate
2122  // any cached information in ScalarEvolution for the outer most loop
2123  // containing an exit block and all nested loops.
2124  if (SE) {
2125  if (OuterExitL)
2126  SE->forgetLoop(OuterExitL);
2127  else
2128  SE->forgetTopmostLoop(&L);
2129  }
2130 
2131  bool InsertFreeze = false;
2132  if (FreezeLoopUnswitchCond) {
2133  ICFLoopSafetyInfo SafetyInfo;
2134  SafetyInfo.computeLoopSafetyInfo(&L);
2135  InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
2136  }
2137 
2138  // If the edge from this terminator to a successor dominates that successor,
2139  // store a map from each block in its dominator subtree to it. This lets us
2140  // tell when cloning for a particular successor if a block is dominated by
2141  // some *other* successor with a single data structure. We use this to
2142  // significantly reduce cloning.
2144  for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2145  makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2146  if (SuccBB->getUniquePredecessor() ||
2147  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2148  return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2149  }))
2150  visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2151  DominatingSucc[BB] = SuccBB;
2152  return true;
2153  });
2154 
2155  // Split the preheader, so that we know that there is a safe place to insert
2156  // the conditional branch. We will change the preheader to have a conditional
2157  // branch on LoopCond. The original preheader will become the split point
2158  // between the unswitched versions, and we will have a new preheader for the
2159  // original loop.
2160  BasicBlock *SplitBB = L.getLoopPreheader();
2161  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2162 
2163  // Keep track of the dominator tree updates needed.
2165 
2166  // Clone the loop for each unswitched successor.
2168  VMaps.reserve(UnswitchedSuccBBs.size());
2170  for (auto *SuccBB : UnswitchedSuccBBs) {
2171  VMaps.emplace_back(new ValueToValueMapTy());
2172  ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2173  L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2174  DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2175  }
2176 
2177  // Drop metadata if we may break its semantics by moving this instr into the
2178  // split block.
2179  if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2181  // Do not spend time trying to understand if we can keep it, just drop it
2182  // to save compile time.
2183  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2184  else {
2185  // It is only legal to preserve make.implicit metadata if we are
2186  // guaranteed no reach implicit null check after following this branch.
2187  ICFLoopSafetyInfo SafetyInfo;
2188  SafetyInfo.computeLoopSafetyInfo(&L);
2189  if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2190  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2191  }
2192  }
2193 
2194  // The stitching of the branched code back together depends on whether we're
2195  // doing full unswitching or not with the exception that we always want to
2196  // nuke the initial terminator placed in the split block.
2197  SplitBB->getTerminator()->eraseFromParent();
2198  if (FullUnswitch) {
2199  // Splice the terminator from the original loop and rewrite its
2200  // successors.
2201  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2202 
2203  // Keep a clone of the terminator for MSSA updates.
2204  Instruction *NewTI = TI.clone();
2205  ParentBB->getInstList().push_back(NewTI);
2206 
2207  // First wire up the moved terminator to the preheaders.
2208  if (BI) {
2209  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2210  BI->setSuccessor(ClonedSucc, ClonedPH);
2211  BI->setSuccessor(1 - ClonedSucc, LoopPH);
2212  if (InsertFreeze) {
2213  auto Cond = BI->getCondition();
2214  if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
2215  BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI));
2216  }
2217  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2218  } else {
2219  assert(SI && "Must either be a branch or switch!");
2220 
2221  // Walk the cases and directly update their successors.
2222  assert(SI->getDefaultDest() == RetainedSuccBB &&
2223  "Not retaining default successor!");
2224  SI->setDefaultDest(LoopPH);
2225  for (auto &Case : SI->cases())
2226  if (Case.getCaseSuccessor() == RetainedSuccBB)
2227  Case.setSuccessor(LoopPH);
2228  else
2229  Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2230 
2231  if (InsertFreeze) {
2232  auto Cond = SI->getCondition();
2233  if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT))
2234  SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI));
2235  }
2236  // We need to use the set to populate domtree updates as even when there
2237  // are multiple cases pointing at the same successor we only want to
2238  // remove and insert one edge in the domtree.
2239  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2240  DTUpdates.push_back(
2241  {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2242  }
2243 
2244  if (MSSAU) {
2245  DT.applyUpdates(DTUpdates);
2246  DTUpdates.clear();
2247 
2248  // Remove all but one edge to the retained block and all unswitched
2249  // blocks. This is to avoid having duplicate entries in the cloned Phis,
2250  // when we know we only keep a single edge for each case.
2251  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2252  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2253  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2254 
2255  for (auto &VMap : VMaps)
2256  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2257  /*IgnoreIncomingWithNoClones=*/true);
2258  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2259 
2260  // Remove all edges to unswitched blocks.
2261  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2262  MSSAU->removeEdge(ParentBB, SuccBB);
2263  }
2264 
2265  // Now unhook the successor relationship as we'll be replacing
2266  // the terminator with a direct branch. This is much simpler for branches
2267  // than switches so we handle those first.
2268  if (BI) {
2269  // Remove the parent as a predecessor of the unswitched successor.
2270  assert(UnswitchedSuccBBs.size() == 1 &&
2271  "Only one possible unswitched block for a branch!");
2272  BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2273  UnswitchedSuccBB->removePredecessor(ParentBB,
2274  /*KeepOneInputPHIs*/ true);
2275  DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2276  } else {
2277  // Note that we actually want to remove the parent block as a predecessor
2278  // of *every* case successor. The case successor is either unswitched,
2279  // completely eliminating an edge from the parent to that successor, or it
2280  // is a duplicate edge to the retained successor as the retained successor
2281  // is always the default successor and as we'll replace this with a direct
2282  // branch we no longer need the duplicate entries in the PHI nodes.
2283  SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2284  assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2285  "Not retaining default successor!");
2286  for (auto &Case : NewSI->cases())
2287  Case.getCaseSuccessor()->removePredecessor(
2288  ParentBB,
2289  /*KeepOneInputPHIs*/ true);
2290 
2291  // We need to use the set to populate domtree updates as even when there
2292  // are multiple cases pointing at the same successor we only want to
2293  // remove and insert one edge in the domtree.
2294  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2295  DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2296  }
2297 
2298  // After MSSAU update, remove the cloned terminator instruction NewTI.
2299  ParentBB->getTerminator()->eraseFromParent();
2300 
2301  // Create a new unconditional branch to the continuing block (as opposed to
2302  // the one cloned).
2303  BranchInst::Create(RetainedSuccBB, ParentBB);
2304  } else {
2305  assert(BI && "Only branches have partial unswitching.");
2306  assert(UnswitchedSuccBBs.size() == 1 &&
2307  "Only one possible unswitched block for a branch!");
2308  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2309  // When doing a partial unswitch, we have to do a bit more work to build up
2310  // the branch in the split block.
2311  if (PartiallyInvariant)
2313  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2314  else
2315  buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
2316  *ClonedPH, *LoopPH, InsertFreeze);
2317  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2318 
2319  if (MSSAU) {
2320  DT.applyUpdates(DTUpdates);
2321  DTUpdates.clear();
2322 
2323  // Perform MSSA cloning updates.
2324  for (auto &VMap : VMaps)
2325  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2326  /*IgnoreIncomingWithNoClones=*/true);
2327  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2328  }
2329  }
2330 
2331  // Apply the updates accumulated above to get an up-to-date dominator tree.
2332  DT.applyUpdates(DTUpdates);
2333 
2334  // Now that we have an accurate dominator tree, first delete the dead cloned
2335  // blocks so that we can accurately build any cloned loops. It is important to
2336  // not delete the blocks from the original loop yet because we still want to
2337  // reference the original loop to understand the cloned loop's structure.
2338  deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2339 
2340  // Build the cloned loop structure itself. This may be substantially
2341  // different from the original structure due to the simplified CFG. This also
2342  // handles inserting all the cloned blocks into the correct loops.
2343  SmallVector<Loop *, 4> NonChildClonedLoops;
2344  for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2345  buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2346 
2347  // Now that our cloned loops have been built, we can update the original loop.
2348  // First we delete the dead blocks from it and then we rebuild the loop
2349  // structure taking these deletions into account.
2350  deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);
2351 
2352  if (MSSAU && VerifyMemorySSA)
2353  MSSAU->getMemorySSA()->verifyMemorySSA();
2354 
2355  SmallVector<Loop *, 4> HoistedLoops;
2356  bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
2357 
2358  if (MSSAU && VerifyMemorySSA)
2359  MSSAU->getMemorySSA()->verifyMemorySSA();
2360 
2361  // This transformation has a high risk of corrupting the dominator tree, and
2362  // the below steps to rebuild loop structures will result in hard to debug
2363  // errors in that case so verify that the dominator tree is sane first.
2364  // FIXME: Remove this when the bugs stop showing up and rely on existing
2365  // verification steps.
2367 
2368  if (BI && !PartiallyInvariant) {
2369  // If we unswitched a branch which collapses the condition to a known
2370  // constant we want to replace all the uses of the invariants within both
2371  // the original and cloned blocks. We do this here so that we can use the
2372  // now updated dominator tree to identify which side the users are on.
2373  assert(UnswitchedSuccBBs.size() == 1 &&
2374  "Only one possible unswitched block for a branch!");
2375  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2376 
2377  // When considering multiple partially-unswitched invariants
2378  // we cant just go replace them with constants in both branches.
2379  //
2380  // For 'AND' we infer that true branch ("continue") means true
2381  // for each invariant operand.
2382  // For 'OR' we can infer that false branch ("continue") means false
2383  // for each invariant operand.
2384  // So it happens that for multiple-partial case we dont replace
2385  // in the unswitched branch.
2386  bool ReplaceUnswitched =
2387  FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2388 
2389  ConstantInt *UnswitchedReplacement =
2392  ConstantInt *ContinueReplacement =
2395  for (Value *Invariant : Invariants) {
2396  assert(!isa<Constant>(Invariant) &&
2397  "Should not be replacing constant values!");
2398  // Use make_early_inc_range here as set invalidates the iterator.
2399  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2400  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2401  if (!UserI)
2402  continue;
2403 
2404  // Replace it with the 'continue' side if in the main loop body, and the
2405  // unswitched if in the cloned blocks.
2406  if (DT.dominates(LoopPH, UserI->getParent()))
2407  U.set(ContinueReplacement);
2408  else if (ReplaceUnswitched &&
2409  DT.dominates(ClonedPH, UserI->getParent()))
2410  U.set(UnswitchedReplacement);
2411  }
2412  }
2413  }
2414 
2415  // We can change which blocks are exit blocks of all the cloned sibling
2416  // loops, the current loop, and any parent loops which shared exit blocks
2417  // with the current loop. As a consequence, we need to re-form LCSSA for
2418  // them. But we shouldn't need to re-form LCSSA for any child loops.
2419  // FIXME: This could be made more efficient by tracking which exit blocks are
2420  // new, and focusing on them, but that isn't likely to be necessary.
2421  //
2422  // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2423  // loop nest and update every loop that could have had its exits changed. We
2424  // also need to cover any intervening loops. We add all of these loops to
2425  // a list and sort them by loop depth to achieve this without updating
2426  // unnecessary loops.
2427  auto UpdateLoop = [&](Loop &UpdateL) {
2428 #ifndef NDEBUG
2429  UpdateL.verifyLoop();
2430  for (Loop *ChildL : UpdateL) {
2431  ChildL->verifyLoop();
2432  assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2433  "Perturbed a child loop's LCSSA form!");
2434  }
2435 #endif
2436  // First build LCSSA for this loop so that we can preserve it when
2437  // forming dedicated exits. We don't want to perturb some other loop's
2438  // LCSSA while doing that CFG edit.
2439  formLCSSA(UpdateL, DT, &LI, SE);
2440 
2441  // For loops reached by this loop's original exit blocks we may
2442  // introduced new, non-dedicated exits. At least try to re-form dedicated
2443  // exits for these loops. This may fail if they couldn't have dedicated
2444  // exits to start with.
2445  formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2446  };
2447 
2448  // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2449  // and we can do it in any order as they don't nest relative to each other.
2450  //
2451  // Also check if any of the loops we have updated have become top-level loops
2452  // as that will necessitate widening the outer loop scope.
2453  for (Loop *UpdatedL :
2454  llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2455  UpdateLoop(*UpdatedL);
2456  if (UpdatedL->isOutermost())
2457  OuterExitL = nullptr;
2458  }
2459  if (IsStillLoop) {
2460  UpdateLoop(L);
2461  if (L.isOutermost())
2462  OuterExitL = nullptr;
2463  }
2464 
2465  // If the original loop had exit blocks, walk up through the outer most loop
2466  // of those exit blocks to update LCSSA and form updated dedicated exits.
2467  if (OuterExitL != &L)
2468  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2469  OuterL = OuterL->getParentLoop())
2470  UpdateLoop(*OuterL);
2471 
2472 #ifndef NDEBUG
2473  // Verify the entire loop structure to catch any incorrect updates before we
2474  // progress in the pass pipeline.
2475  LI.verify(DT);
2476 #endif
2477 
2478  // Now that we've unswitched something, make callbacks to report the changes.
2479  // For that we need to merge together the updated loops and the cloned loops
2480  // and check whether the original loop survived.
2481  SmallVector<Loop *, 4> SibLoops;
2482  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2483  if (UpdatedL->getParentLoop() == ParentL)
2484  SibLoops.push_back(UpdatedL);
2485  UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2486 
2487  if (MSSAU && VerifyMemorySSA)
2488  MSSAU->getMemorySSA()->verifyMemorySSA();
2489 
2490  if (BI)
2491  ++NumBranches;
2492  else
2493  ++NumSwitches;
2494 }
2495 
2496 /// Recursively compute the cost of a dominator subtree based on the per-block
2497 /// cost map provided.
2498 ///
2499 /// The recursive computation is memozied into the provided DT-indexed cost map
2500 /// to allow querying it for most nodes in the domtree without it becoming
2501 /// quadratic.
2503  DomTreeNode &N,
2506  // Don't accumulate cost (or recurse through) blocks not in our block cost
2507  // map and thus not part of the duplication cost being considered.
2508  auto BBCostIt = BBCostMap.find(N.getBlock());
2509  if (BBCostIt == BBCostMap.end())
2510  return 0;
2511 
2512  // Lookup this node to see if we already computed its cost.
2513  auto DTCostIt = DTCostMap.find(&N);
2514  if (DTCostIt != DTCostMap.end())
2515  return DTCostIt->second;
2516 
2517  // If not, we have to compute it. We can't use insert above and update
2518  // because computing the cost may insert more things into the map.
2519  InstructionCost Cost = std::accumulate(
2520  N.begin(), N.end(), BBCostIt->second,
2521  [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2522  return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2523  });
2524  bool Inserted = DTCostMap.insert({&N, Cost}).second;
2525  (void)Inserted;
2526  assert(Inserted && "Should not insert a node while visiting children!");
2527  return Cost;
2528 }
2529 
2530 /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2531 /// making the following replacement:
2532 ///
2533 /// --code before guard--
2534 /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2535 /// --code after guard--
2536 ///
2537 /// into
2538 ///
2539 /// --code before guard--
2540 /// br i1 %cond, label %guarded, label %deopt
2541 ///
2542 /// guarded:
2543 /// --code after guard--
2544 ///
2545 /// deopt:
2546 /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2547 /// unreachable
2548 ///
2549 /// It also makes all relevant DT and LI updates, so that all structures are in
2550 /// valid state after this transform.
2551 static BranchInst *
2553  SmallVectorImpl<BasicBlock *> &ExitBlocks,
2554  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2556  LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2557  BasicBlock *CheckBB = GI->getParent();
2558 
2559  if (MSSAU && VerifyMemorySSA)
2560  MSSAU->getMemorySSA()->verifyMemorySSA();
2561 
2562  // Remove all CheckBB's successors from DomTree. A block can be seen among
2563  // successors more than once, but for DomTree it should be added only once.
2564  SmallPtrSet<BasicBlock *, 4> Successors;
2565  for (auto *Succ : successors(CheckBB))
2566  if (Successors.insert(Succ).second)
2567  DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2568 
2569  Instruction *DeoptBlockTerm =
2570  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2571  BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2572  // SplitBlockAndInsertIfThen inserts control flow that branches to
2573  // DeoptBlockTerm if the condition is true. We want the opposite.
2574  CheckBI->swapSuccessors();
2575 
2576  BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2577  GuardedBlock->setName("guarded");
2578  CheckBI->getSuccessor(1)->setName("deopt");
2579  BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2580 
2581  // We now have a new exit block.
2582  ExitBlocks.push_back(CheckBI->getSuccessor(1));
2583 
2584  if (MSSAU)
2585  MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2586 
2587  GI->moveBefore(DeoptBlockTerm);
2589 
2590  // Add new successors of CheckBB into DomTree.
2591  for (auto *Succ : successors(CheckBB))
2592  DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2593 
2594  // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2595  // successors.
2596  for (auto *Succ : Successors)
2597  DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2598 
2599  // Make proper changes to DT.
2600  DT.applyUpdates(DTUpdates);
2601  // Inform LI of a new loop block.
2602  L.addBasicBlockToLoop(GuardedBlock, LI);
2603 
2604  if (MSSAU) {
2605  MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2606  MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2607  if (VerifyMemorySSA)
2608  MSSAU->getMemorySSA()->verifyMemorySSA();
2609  }
2610 
2611  ++NumGuards;
2612  return CheckBI;
2613 }
2614 
2615 /// Cost multiplier is a way to limit potentially exponential behavior
2616 /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2617 /// candidates available. Also accounting for the number of "sibling" loops with
2618 /// the idea to account for previous unswitches that already happened on this
2619 /// cluster of loops. There was an attempt to keep this formula simple,
2620 /// just enough to limit the worst case behavior. Even if it is not that simple
2621 /// now it is still not an attempt to provide a detailed heuristic size
2622 /// prediction.
2623 ///
2624 /// TODO: Make a proper accounting of "explosion" effect for all kinds of
2625 /// unswitch candidates, making adequate predictions instead of wild guesses.
2626 /// That requires knowing not just the number of "remaining" candidates but
2627 /// also costs of unswitching for each of these candidates.
2629  Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2631  UnswitchCandidates) {
2632 
2633  // Guards and other exiting conditions do not contribute to exponential
2634  // explosion as soon as they dominate the latch (otherwise there might be
2635  // another path to the latch remaining that does not allow to eliminate the
2636  // loop copy on unswitch).
2637  BasicBlock *Latch = L.getLoopLatch();
2638  BasicBlock *CondBlock = TI.getParent();
2639  if (DT.dominates(CondBlock, Latch) &&
2640  (isGuard(&TI) ||
2641  llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2642  return L.contains(SuccBB);
2643  }) <= 1)) {
2644  NumCostMultiplierSkipped++;
2645  return 1;
2646  }
2647 
2648  auto *ParentL = L.getParentLoop();
2649  int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2650  : std::distance(LI.begin(), LI.end()));
2651  // Count amount of clones that all the candidates might cause during
2652  // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2653  int UnswitchedClones = 0;
2654  for (auto Candidate : UnswitchCandidates) {
2655  Instruction *CI = Candidate.first;
2656  BasicBlock *CondBlock = CI->getParent();
2657  bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2658  if (isGuard(CI)) {
2659  if (!SkipExitingSuccessors)
2660  UnswitchedClones++;
2661  continue;
2662  }
2663  int NonExitingSuccessors = llvm::count_if(
2664  successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2665  return !SkipExitingSuccessors || L.contains(SuccBB);
2666  });
2667  UnswitchedClones += Log2_32(NonExitingSuccessors);
2668  }
2669 
2670  // Ignore up to the "unscaled candidates" number of unswitch candidates
2671  // when calculating the power-of-two scaling of the cost. The main idea
2672  // with this control is to allow a small number of unswitches to happen
2673  // and rely more on siblings multiplier (see below) when the number
2674  // of candidates is small.
2675  unsigned ClonesPower =
2676  std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2677 
2678  // Allowing top-level loops to spread a bit more than nested ones.
2679  int SiblingsMultiplier =
2680  std::max((ParentL ? SiblingsCount
2681  : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2682  1);
2683  // Compute the cost multiplier in a way that won't overflow by saturating
2684  // at an upper bound.
2685  int CostMultiplier;
2686  if (ClonesPower > Log2_32(UnswitchThreshold) ||
2687  SiblingsMultiplier > UnswitchThreshold)
2688  CostMultiplier = UnswitchThreshold;
2689  else
2690  CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2691  (int)UnswitchThreshold);
2692 
2693  LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2694  << " (siblings " << SiblingsMultiplier << " * clones "
2695  << (1 << ClonesPower) << ")"
2696  << " for unswitch candidate: " << TI << "\n");
2697  return CostMultiplier;
2698 }
2699 
2701  Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2703  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2704  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2705  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2706  // Collect all invariant conditions within this loop (as opposed to an inner
2707  // loop which would be handled when visiting that inner loop).
2709  UnswitchCandidates;
2710 
2711  // Whether or not we should also collect guards in the loop.
2712  bool CollectGuards = false;
2713  if (UnswitchGuards) {
2714  auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2715  Intrinsic::getName(Intrinsic::experimental_guard));
2716  if (GuardDecl && !GuardDecl->use_empty())
2717  CollectGuards = true;
2718  }
2719 
2720  IVConditionInfo PartialIVInfo;
2721  for (auto *BB : L.blocks()) {
2722  if (LI.getLoopFor(BB) != &L)
2723  continue;
2724 
2725  if (CollectGuards)
2726  for (auto &I : *BB)
2727  if (isGuard(&I)) {
2728  auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2729  // TODO: Support AND, OR conditions and partial unswitching.
2730  if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2731  UnswitchCandidates.push_back({&I, {Cond}});
2732  }
2733 
2734  if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2735  // We can only consider fully loop-invariant switch conditions as we need
2736  // to completely eliminate the switch after unswitching.
2737  if (!isa<Constant>(SI->getCondition()) &&
2738  L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2739  UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2740  continue;
2741  }
2742 
2743  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2744  if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2745  BI->getSuccessor(0) == BI->getSuccessor(1))
2746  continue;
2747 
2748  // If BI's condition is 'select _, true, false', simplify it to confuse
2749  // matchers
2750  Value *Cond = BI->getCondition(), *CondNext;
2751  while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
2752  Cond = CondNext;
2753  BI->setCondition(Cond);
2754 
2755  if (isa<Constant>(Cond))
2756  continue;
2757 
2758  if (L.isLoopInvariant(BI->getCondition())) {
2759  UnswitchCandidates.push_back({BI, {BI->getCondition()}});
2760  continue;
2761  }
2762 
2763  Instruction &CondI = *cast<Instruction>(BI->getCondition());
2764  if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2765  TinyPtrVector<Value *> Invariants =
2767  if (Invariants.empty())
2768  continue;
2769 
2770  UnswitchCandidates.push_back({BI, std::move(Invariants)});
2771  continue;
2772  }
2773  }
2774 
2775  Instruction *PartialIVCondBranch = nullptr;
2776  if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2777  !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2778  return TerminatorAndInvariants.first == L.getHeader()->getTerminator();
2779  })) {
2780  MemorySSA *MSSA = MSSAU->getMemorySSA();
2781  if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2782  LLVM_DEBUG(
2783  dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2784  << *Info->InstToDuplicate[0] << "\n");
2785  PartialIVInfo = *Info;
2786  PartialIVCondBranch = L.getHeader()->getTerminator();
2787  TinyPtrVector<Value *> ValsToDuplicate;
2788  for (auto *Inst : Info->InstToDuplicate)
2789  ValsToDuplicate.push_back(Inst);
2790  UnswitchCandidates.push_back(
2791  {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2792  }
2793  }
2794 
2795  // If we didn't find any candidates, we're done.
2796  if (UnswitchCandidates.empty())
2797  return false;
2798 
2799  // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2800  // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2801  // irreducible control flow into reducible control flow and introduce new
2802  // loops "out of thin air". If we ever discover important use cases for doing
2803  // this, we can add support to loop unswitch, but it is a lot of complexity
2804  // for what seems little or no real world benefit.
2805  LoopBlocksRPO RPOT(&L);
2806  RPOT.perform(&LI);
2807  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2808  return false;
2809 
2810  SmallVector<BasicBlock *, 4> ExitBlocks;
2811  L.getUniqueExitBlocks(ExitBlocks);
2812 
2813  // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
2814  // instruction as we don't know how to split those exit blocks.
2815  // FIXME: We should teach SplitBlock to handle this and remove this
2816  // restriction.
2817  for (auto *ExitBB : ExitBlocks) {
2818  auto *I = ExitBB->getFirstNonPHI();
2819  if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
2820  LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
2821  "in exit block\n");
2822  return false;
2823  }
2824  }
2825 
2826  LLVM_DEBUG(
2827  dbgs() << "Considering " << UnswitchCandidates.size()
2828  << " non-trivial loop invariant conditions for unswitching.\n");
2829 
2830  // Given that unswitching these terminators will require duplicating parts of
2831  // the loop, so we need to be able to model that cost. Compute the ephemeral
2832  // values and set up a data structure to hold per-BB costs. We cache each
2833  // block's cost so that we don't recompute this when considering different
2834  // subsets of the loop for duplication during unswitching.
2836  CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2838 
2839  // Compute the cost of each block, as well as the total loop cost. Also, bail
2840  // out if we see instructions which are incompatible with loop unswitching
2841  // (convergent, noduplicate, or cross-basic-block tokens).
2842  // FIXME: We might be able to safely handle some of these in non-duplicated
2843  // regions.
2845  L.getHeader()->getParent()->hasMinSize()
2848  InstructionCost LoopCost = 0;
2849  for (auto *BB : L.blocks()) {
2850  InstructionCost Cost = 0;
2851  for (auto &I : *BB) {
2852  if (EphValues.count(&I))
2853  continue;
2854 
2855  if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2856  return false;
2857  if (auto *CB = dyn_cast<CallBase>(&I))
2858  if (CB->isConvergent() || CB->cannotDuplicate())
2859  return false;
2860 
2861  Cost += TTI.getUserCost(&I, CostKind);
2862  }
2863  assert(Cost >= 0 && "Must not have negative costs!");
2864  LoopCost += Cost;
2865  assert(LoopCost >= 0 && "Must not have negative loop costs!");
2866  BBCostMap[BB] = Cost;
2867  }
2868  LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
2869 
2870  // Now we find the best candidate by searching for the one with the following
2871  // properties in order:
2872  //
2873  // 1) An unswitching cost below the threshold
2874  // 2) The smallest number of duplicated unswitch candidates (to avoid
2875  // creating redundant subsequent unswitching)
2876  // 3) The smallest cost after unswitching.
2877  //
2878  // We prioritize reducing fanout of unswitch candidates provided the cost
2879  // remains below the threshold because this has a multiplicative effect.
2880  //
2881  // This requires memoizing each dominator subtree to avoid redundant work.
2882  //
2883  // FIXME: Need to actually do the number of candidates part above.
2885  // Given a terminator which might be unswitched, computes the non-duplicated
2886  // cost for that terminator.
2887  auto ComputeUnswitchedCost = [&](Instruction &TI,
2888  bool FullUnswitch) -> InstructionCost {
2889  BasicBlock &BB = *TI.getParent();
2891 
2892  InstructionCost Cost = 0;
2893  for (BasicBlock *SuccBB : successors(&BB)) {
2894  // Don't count successors more than once.
2895  if (!Visited.insert(SuccBB).second)
2896  continue;
2897 
2898  // If this is a partial unswitch candidate, then it must be a conditional
2899  // branch with a condition of either `or`, `and`, their corresponding
2900  // select forms or partially invariant instructions. In that case, one of
2901  // the successors is necessarily duplicated, so don't even try to remove
2902  // its cost.
2903  if (!FullUnswitch) {
2904  auto &BI = cast<BranchInst>(TI);
2905  if (match(BI.getCondition(), m_LogicalAnd())) {
2906  if (SuccBB == BI.getSuccessor(1))
2907  continue;
2908  } else if (match(BI.getCondition(), m_LogicalOr())) {
2909  if (SuccBB == BI.getSuccessor(0))
2910  continue;
2911  } else if ((PartialIVInfo.KnownValue->isOneValue() &&
2912  SuccBB == BI.getSuccessor(0)) ||
2913  (!PartialIVInfo.KnownValue->isOneValue() &&
2914  SuccBB == BI.getSuccessor(1)))
2915  continue;
2916  }
2917 
2918  // This successor's domtree will not need to be duplicated after
2919  // unswitching if the edge to the successor dominates it (and thus the
2920  // entire tree). This essentially means there is no other path into this
2921  // subtree and so it will end up live in only one clone of the loop.
2922  if (SuccBB->getUniquePredecessor() ||
2923  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2924  return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2925  })) {
2926  Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2927  assert(Cost <= LoopCost &&
2928  "Non-duplicated cost should never exceed total loop cost!");
2929  }
2930  }
2931 
2932  // Now scale the cost by the number of unique successors minus one. We
2933  // subtract one because there is already at least one copy of the entire
2934  // loop. This is computing the new cost of unswitching a condition.
2935  // Note that guards always have 2 unique successors that are implicit and
2936  // will be materialized if we decide to unswitch it.
2937  int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2938  assert(SuccessorsCount > 1 &&
2939  "Cannot unswitch a condition without multiple distinct successors!");
2940  return (LoopCost - Cost) * (SuccessorsCount - 1);
2941  };
2942  Instruction *BestUnswitchTI = nullptr;
2943  InstructionCost BestUnswitchCost = 0;
2944  ArrayRef<Value *> BestUnswitchInvariants;
2945  for (auto &TerminatorAndInvariants : UnswitchCandidates) {
2946  Instruction &TI = *TerminatorAndInvariants.first;
2947  ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
2948  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2949  InstructionCost CandidateCost = ComputeUnswitchedCost(
2950  TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
2951  Invariants[0] == BI->getCondition()));
2952  // Calculate cost multiplier which is a tool to limit potentially
2953  // exponential behavior of loop-unswitch.
2955  int CostMultiplier =
2956  CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2957  assert(
2958  (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
2959  "cost multiplier needs to be in the range of 1..UnswitchThreshold");
2960  CandidateCost *= CostMultiplier;
2961  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2962  << " (multiplier: " << CostMultiplier << ")"
2963  << " for unswitch candidate: " << TI << "\n");
2964  } else {
2965  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2966  << " for unswitch candidate: " << TI << "\n");
2967  }
2968 
2969  if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2970  BestUnswitchTI = &TI;
2971  BestUnswitchCost = CandidateCost;
2972  BestUnswitchInvariants = Invariants;
2973  }
2974  }
2975  assert(BestUnswitchTI && "Failed to find loop unswitch candidate");
2976 
2977  if (BestUnswitchCost >= UnswitchThreshold) {
2978  LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
2979  << BestUnswitchCost << "\n");
2980  return false;
2981  }
2982 
2983  if (BestUnswitchTI != PartialIVCondBranch)
2984  PartialIVInfo.InstToDuplicate.clear();
2985 
2986  // If the best candidate is a guard, turn it into a branch.
2987  if (isGuard(BestUnswitchTI))
2988  BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
2989  ExitBlocks, DT, LI, MSSAU);
2990 
2991  LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
2992  << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
2993  << "\n");
2994  unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
2995  ExitBlocks, PartialIVInfo, DT, LI, AC,
2996  UnswitchCB, SE, MSSAU, DestroyLoopCB);
2997  return true;
2998 }
2999 
3000 /// Unswitch control flow predicated on loop invariant conditions.
3001 ///
3002 /// This first hoists all branches or switches which are trivial (IE, do not
3003 /// require duplicating any part of the loop) out of the loop body. It then
3004 /// looks at other loop invariant control flows and tries to unswitch those as
3005 /// well by cloning the loop if the result is small enough.
3006 ///
3007 /// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3008 /// also updated based on the unswitch. The `MSSA` analysis is also updated if
3009 /// valid (i.e. its use is enabled).
3010 ///
3011 /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3012 /// true, we will attempt to do non-trivial unswitching as well as trivial
3013 /// unswitching.
3014 ///
3015 /// The `UnswitchCB` callback provided will be run after unswitching is
3016 /// complete, with the first parameter set to `true` if the provided loop
3017 /// remains a loop, and a list of new sibling loops created.
3018 ///
3019 /// If `SE` is non-null, we will update that analysis based on the unswitching
3020 /// done.
3021 static bool
3023  AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
3024  bool NonTrivial,
3025  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3026  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
3027  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3028  assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3029  "Loops must be in LCSSA form before unswitching.");
3030 
3031  // Must be in loop simplified form: we need a preheader and dedicated exits.
3032  if (!L.isLoopSimplifyForm())
3033  return false;
3034 
3035  // Try trivial unswitch first before loop over other basic blocks in the loop.
3036  if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3037  // If we unswitched successfully we will want to clean up the loop before
3038  // processing it further so just mark it as unswitched and return.
3039  UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3040  return true;
3041  }
3042 
3043  // Check whether we should continue with non-trivial conditions.
3044  // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3045  // unswitching for testing and debugging.
3046  // NonTrivial: Parameter that enables non-trivial unswitching for this
3047  // invocation of the transform. But this should be allowed only
3048  // for targets without branch divergence.
3049  //
3050  // FIXME: If divergence analysis becomes available to a loop
3051  // transform, we should allow unswitching for non-trivial uniform
3052  // branches even on targets that have divergence.
3053  // https://bugs.llvm.org/show_bug.cgi?id=48819
3054  bool ContinueWithNonTrivial =
3055  EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
3056  if (!ContinueWithNonTrivial)
3057  return false;
3058 
3059  // Skip non-trivial unswitching for optsize functions.
3060  if (L.getHeader()->getParent()->hasOptSize())
3061  return false;
3062 
3063  // Skip non-trivial unswitching for loops that cannot be cloned.
3064  if (!L.isSafeToClone())
3065  return false;
3066 
3067  // For non-trivial unswitching, because it often creates new loops, we rely on
3068  // the pass manager to iterate on the loops rather than trying to immediately
3069  // reach a fixed point. There is no substantial advantage to iterating
3070  // internally, and if any of the new loops are simplified enough to contain
3071  // trivial unswitching we want to prefer those.
3072 
3073  // Try to unswitch the best invariant condition. We prefer this full unswitch to
3074  // a partial unswitch when possible below the threshold.
3075  if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
3076  DestroyLoopCB))
3077  return true;
3078 
3079  // No other opportunities to unswitch.
3080  return false;
3081 }
3082 
3085  LPMUpdater &U) {
3086  Function &F = *L.getHeader()->getParent();
3087  (void)F;
3088 
3089  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3090  << "\n");
3091 
3092  // Save the current loop name in a variable so that we can report it even
3093  // after it has been deleted.
3094  std::string LoopName = std::string(L.getName());
3095 
3096  auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3097  bool PartiallyInvariant,
3098  ArrayRef<Loop *> NewLoops) {
3099  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3100  if (!NewLoops.empty())
3101  U.addSiblingLoops(NewLoops);
3102 
3103  // If the current loop remains valid, we should revisit it to catch any
3104  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3105  if (CurrentLoopValid) {
3106  if (PartiallyInvariant) {
3107  // Mark the new loop as partially unswitched, to avoid unswitching on
3108  // the same condition again.
3109  auto &Context = L.getHeader()->getContext();
3110  MDNode *DisableUnswitchMD = MDNode::get(
3111  Context,
3112  MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3114  Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3115  {DisableUnswitchMD});
3116  L.setLoopID(NewLoopID);
3117  } else
3118  U.revisitCurrentLoop();
3119  } else
3120  U.markLoopAsDeleted(L, LoopName);
3121  };
3122 
3123  auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
3124  U.markLoopAsDeleted(L, Name);
3125  };
3126 
3128  if (AR.MSSA) {
3129  MSSAU = MemorySSAUpdater(AR.MSSA);
3130  if (VerifyMemorySSA)
3131  AR.MSSA->verifyMemorySSA();
3132  }
3133  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3134  UnswitchCB, &AR.SE,
3135  MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
3136  DestroyLoopCB))
3137  return PreservedAnalyses::all();
3138 
3139  if (AR.MSSA && VerifyMemorySSA)
3140  AR.MSSA->verifyMemorySSA();
3141 
3142  // Historically this pass has had issues with the dominator tree so verify it
3143  // in asserts builds.
3145 
3146  auto PA = getLoopPassPreservedAnalyses();
3147  if (AR.MSSA)
3148  PA.preserve<MemorySSAAnalysis>();
3149  return PA;
3150 }
3151 
3153  raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3155  OS, MapClassName2PassName);
3156 
3157  OS << "<";
3158  OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3159  OS << (Trivial ? "" : "no-") << "trivial";
3160  OS << ">";
3161 }
3162 
3163 namespace {
3164 
3165 class SimpleLoopUnswitchLegacyPass : public LoopPass {
3166  bool NonTrivial;
3167 
3168 public:
3169  static char ID; // Pass ID, replacement for typeid
3170 
3171  explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3172  : LoopPass(ID), NonTrivial(NonTrivial) {
3175  }
3176 
3177  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3178 
3179  void getAnalysisUsage(AnalysisUsage &AU) const override {
3185  }
3186 };
3187 
3188 } // end anonymous namespace
3189 
3190 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3191  if (skipLoop(L))
3192  return false;
3193 
3194  Function &F = *L->getHeader()->getParent();
3195 
3196  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
3197  << "\n");
3198 
3199  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3200  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3201  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3202  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3203  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3204  MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3205  MemorySSAUpdater MSSAU(MSSA);
3206 
3207  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3208  auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3209 
3210  auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3211  ArrayRef<Loop *> NewLoops) {
3212  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3213  for (auto *NewL : NewLoops)
3214  LPM.addLoop(*NewL);
3215 
3216  // If the current loop remains valid, re-add it to the queue. This is
3217  // a little wasteful as we'll finish processing the current loop as well,
3218  // but it is the best we can do in the old PM.
3219  if (CurrentLoopValid) {
3220  // If the current loop has been unswitched using a partially invariant
3221  // condition, we should not re-add the current loop to avoid unswitching
3222  // on the same condition again.
3223  if (!PartiallyInvariant)
3224  LPM.addLoop(*L);
3225  } else
3226  LPM.markLoopAsDeleted(*L);
3227  };
3228 
3229  auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
3230  LPM.markLoopAsDeleted(L);
3231  };
3232 
3233  if (VerifyMemorySSA)
3234  MSSA->verifyMemorySSA();
3235 
3236  bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
3237  UnswitchCB, SE, &MSSAU, DestroyLoopCB);
3238 
3239  if (VerifyMemorySSA)
3240  MSSA->verifyMemorySSA();
3241 
3242  // Historically this pass has had issues with the dominator tree so verify it
3243  // in asserts builds.
3245 
3246  return Changed;
3247 }
3248 
3250 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3251  "Simple unswitch loops", false, false)
3258 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3260 
3262  return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3263 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::formLCSSA
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:336
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:523
cloneLoopNest
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Definition: SimpleLoopUnswitch.cpp:1257
AssumptionCache.h
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::createSimpleLoopUnswitchLegacyPass
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
Definition: SimpleLoopUnswitch.cpp:3261
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
UnswitchNumInitialUnscaledCandidates
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1901
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:54
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:264
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:226
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
ValueMapper.h
llvm::SwitchInstProfUpdateWrapper
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
Definition: Instructions.h:3578
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::PassInfoMixin< SimpleLoopUnswitchPass >
llvm::SwitchInstProfUpdateWrapper::getSuccessorWeight
CaseWeightOpt getSuccessorWeight(unsigned idx)
Definition: Instructions.cpp:4467
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::cfg::UpdateKind::Insert
@ Insert
unswitchTrivialBranch
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:432
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:76
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5155
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1176
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:690
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:209
llvm::SwitchInst::Create
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3425
recomputeLoopBlockSet
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
Definition: SimpleLoopUnswitch.cpp:1687
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:210
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:880
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1773
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1008
ValueTracking.h
Local.h
llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1383
llvm::MemorySSAUpdater::removeBlocks
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
Definition: MemorySSAUpdater.cpp:1371
unswitchNontrivialInvariants
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, SmallVectorImpl< BasicBlock * > &ExitBlocks, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:2027
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:150
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin
iterator begin()
Definition: DenseMap.h:74
CalculateUnswitchCostMultiplier
static int CalculateUnswitchCostMultiplier(Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT, ArrayRef< std::pair< Instruction *, TinyPtrVector< Value * >>> UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
Definition: SimpleLoopUnswitch.cpp:2628
ScalarEvolution.h
unswitchBestCondition
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:2700
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:357
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1271
areLoopExitPHIsLoopInvariant
static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
Definition: SimpleLoopUnswitch.cpp:184
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::DominatorTreeBase::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::MemorySSAUpdater::moveAllAfterSpliceBlocks
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
Definition: MemorySSAUpdater.cpp:1246
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
turnGuardIntoBranch
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
Definition: SimpleLoopUnswitch.cpp:2552
unswitch
simple loop unswitch
Definition: SimpleLoopUnswitch.cpp:3258
llvm::Optional< uint32_t >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:31
llvm::SwitchInstProfUpdateWrapper::removeCase
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Definition: Instructions.cpp:4425
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:643
RHS
Value * RHS
Definition: X86PartialReduction.cpp:74
llvm::SwitchInst::cases
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Definition: Instructions.h:3482
llvm::SwitchInst::getDefaultDest
BasicBlock * getDefaultDest() const
Definition: Instructions.h:3443
llvm::SimpleLoopUnswitchPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: SimpleLoopUnswitch.cpp:3152
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
Sequence.h
llvm::Constant::isOneValue
bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:108
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:178
llvm::zip_first
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:752
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1682
computeDomSubtreeCost
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
Definition: SimpleLoopUnswitch.cpp:2502
Use.h
buildPartialUnswitchConditionalBranch
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
Definition: SimpleLoopUnswitch.cpp:203
MustExecute.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
llvm::MemorySSAUpdater::applyUpdates
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:808
llvm::MemorySSAUpdater::createMemoryAccessInBB
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
Definition: MemorySSAUpdater.cpp:1428
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1307
llvm::LPPassManager::addLoop
void addLoop(Loop &L)
Definition: LoopPass.cpp:78
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
buildPartialInvariantUnswitchConditionalBranch
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
Definition: SimpleLoopUnswitch.cpp:217
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3190
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
Context
ManagedStatic< detail::RecordContext > Context
Definition: Record.cpp:96
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Optional::getPointer
constexpr const T * getPointer() const
Definition: Optional.h:280
LoopAnalysisManager.h
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1124
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
DropNonTrivialImplicitNullChecks
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
Instruction.h
CommandLine.h
CodeMetrics.h
replaceLoopInvariantUses
static void replaceLoopInvariantUses(Loop &L, Value *Invariant, Constant &Replacement)
Definition: SimpleLoopUnswitch.cpp:167
LHS
Value * LHS
Definition: X86PartialReduction.cpp:73
llvm::SwitchInst::CaseHandleImpl::getCaseSuccessor
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
Definition: Instructions.h:3309
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1592
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:254
hoistLoopToNewParent
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
Definition: SimpleLoopUnswitch.cpp:333
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:980
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:507
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:656
llvm::DominatorTreeBase::insertEdge
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
Definition: GenericDomTree.h:584
Twine.h
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:395
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:232
llvm::SwitchInst::CaseHandleImpl::getSuccessorIndex
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
Definition: Instructions.h:3320
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TinyPtrVector::push_back
void push_back(EltTy NewVal)
Definition: TinyPtrVector.h:244
rewritePHINodesForExitAndUnswitchedBlocks
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Definition: SimpleLoopUnswitch.cpp:288
llvm::LoopInfoBase::addTopLevelLoop
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1027
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopInfoBase::destroy
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:1074
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
false
Definition: StackSlotColoring.cpp:142
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
llvm::MemorySSAUpdater::moveToPlace
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Definition: MemorySSAUpdater.cpp:1199
llvm::Instruction
Definition: Instruction.h:45
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3178
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
unswitchLoop
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Unswitch control flow predicated on loop invariant conditions.
Definition: SimpleLoopUnswitch.cpp:3022
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:32
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:376
llvm::SwitchInstProfUpdateWrapper::setSuccessorWeight
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Definition: Instructions.cpp:4473
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1775
LoopUtils.h
llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:260
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
PatternMatch.h
visitDomSubTree
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
Definition: SimpleLoopUnswitch.cpp:2004
llvm::ValueMap::count
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:152
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:869
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:71
LoopIterator.h
deleteDeadClonedBlocks
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy >> VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:1566
llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1361
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:704
llvm::MemorySSAUpdater::removeDuplicatePhiEdgesBetween
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
Definition: MemorySSAUpdater.cpp:537
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:513
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::LoopInfoBase::AllocateLoop
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:934
collectHomogenousInstGraphLoopInvariants
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root, LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
Definition: SimpleLoopUnswitch.cpp:125
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3173
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::MemorySSAUpdater::applyInsertUpdates
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:859
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
BasicBlock.h
llvm::cl::opt< bool >
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::MemorySSAUpdater::removeEdge
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
Definition: MemorySSAUpdater.cpp:530
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::SwitchInst::setDefaultDest
void setDefaultDest(BasicBlock *DefaultCase)
Definition: Instructions.h:3447
llvm::LoopPass
Definition: LoopPass.h:27
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2476
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2554
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:55
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:263
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:243
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:122
llvm::MemorySSAUpdater::updateForClonedLoop
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
Definition: MemorySSAUpdater.cpp:675
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
FreezeLoopUnswitchCond
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(false), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3148
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:704
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:721
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:585
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:376
SimpleLoopUnswitch.h
UnswitchGuards
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:473
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ValueToValueMapTy
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
Definition: MemorySSAUpdater.h:51
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::IVConditionInfo::KnownValue
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:507
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::LoopBase::getBlocksVector
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
Definition: LoopInfo.h:192
llvm::IVConditionInfo::InstToDuplicate
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:504
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:945
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7902
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:930
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:140
getTopMostExitingLoop
static Loop * getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI)
Definition: SimpleLoopUnswitch.cpp:404
CFG.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:860
llvm::LoopInfoBase::getLoopDepth
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:977
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:404
rebuildLoopAfterUnswitch
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops)
Rebuild a loop after unswitching removes some subset of blocks and edges.
Definition: SimpleLoopUnswitch.cpp:1798
llvm::LoopBase::getSubLoopsVector
std::vector< LoopT * > & getSubLoopsVector()
Definition: LoopInfo.h:147
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1599
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::MemorySSA::getBlockDefs
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:769
llvm::initializeSimpleLoopUnswitchLegacyPassPass
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
llvm::SwitchInst::CaseHandleImpl::getCaseValue
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Definition: Instructions.h:3302
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::LoopBase::reserveBlocks
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:436
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
loops
simple loop Simple unswitch loops
Definition: SimpleLoopUnswitch.cpp:3259
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
unswitchTrivialSwitch
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:660
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
llvm::findOptionMDForLoop
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1045
llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:995
llvm::DomTreeNodeBase< BasicBlock >
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1348
llvm::LoopInfoBase::removeLoop
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:997
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:880
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:661
llvm::MemorySSAUpdater::updateExitBlocksForClonedLoop
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Definition: MemorySSAUpdater.cpp:786
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:89
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::SimpleLoopUnswitchPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: SimpleLoopUnswitch.cpp:3083
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
Constant.h
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:518
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:63
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1727
deleteDeadBlocksFromLoop
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:1596
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7845
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:873
UnswitchThreshold
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::ZeroOrMore, cl::desc("The cost threshold for unswitching a loop."))
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:528
llvm::SwitchInstProfUpdateWrapper::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
Definition: Instructions.cpp:4439
MSSAThreshold
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:112
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::IRBuilderBase::CreateFreeze
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2390
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::ICFLoopSafetyInfo
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2699
GenericDomTree.h
buildClonedLoops
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
Definition: SimpleLoopUnswitch.cpp:1316
llvm::LoopBase::getBlocksSet
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
Definition: LoopInfo.h:198
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:504
llvm::MemorySSA::BeforeTerminator
@ BeforeTerminator
Definition: MemorySSA.h:792
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1533
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:214
llvm::IVConditionInfo
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:501
llvm::Loop::isRecursivelyLCSSAForm
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:469
llvm::hasPartialIVCondition
Optional< IVConditionInfo > hasPartialIVCondition(Loop &L, unsigned MSSAThreshold, MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1617
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:580
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::SwitchInstProfUpdateWrapper::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
Definition: Instructions.cpp:4458
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
GuardUtils.h
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::TinyPtrVector::empty
bool empty() const
Definition: TinyPtrVector.h:163
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:946
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
Instructions.h
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:137
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:242
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1343
llvm::FreezeInst
This class represents a freeze function that returns random concrete value if an operand is either a ...
Definition: Instructions.h:5378
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
N
#define N
rewritePHINodesForUnswitchedExitBlock
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
Definition: SimpleLoopUnswitch.cpp:266
llvm::SwitchInst::CaseHandle
Definition: Instructions.h:3336
llvm::BasicBlock::size
size_t size() const
Definition: BasicBlock.h:306
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::Loop::isSafeToClone
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
Definition: LoopInfo.cpp:486
InstructionSimplify.h
llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:81
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:62
llvm::PHINode
Definition: Instructions.h:2657
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:325
llvm::SmallVectorImpl< DominatorTree::UpdateType >
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:658
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:306
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ICFLoopSafetyInfo::isGuaranteedToExecute
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Definition: MustExecute.cpp:270
llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:165
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3236
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1444
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3092
raw_ostream.h
llvm::LoopBlocksRPO::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:633
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:839
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3171
llvm::PatternMatch::m_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
Definition: PatternMatch.h:2536
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3185
SetVector.h
llvm::DominatorTreeBase::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:916
unswitchAllTrivialConditions
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
Definition: SimpleLoopUnswitch.cpp:955
UnswitchSiblingsToplevelDiv
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
EnableNonTrivialUnswitch
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
EnableUnswitchCostMultiplier
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
buildClonedLoopBlocks
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Build the cloned blocks for an unswitched copy of the given loop.
Definition: SimpleLoopUnswitch.cpp:1073