LLVM  15.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1 ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/Sequence.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/CFG.h"
23 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/LoopPass.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/PatternMatch.h"
43 #include "llvm/IR/Use.h"
44 #include "llvm/IR/Value.h"
45 #include "llvm/InitializePasses.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <iterator>
63 #include <numeric>
64 #include <utility>
65 
66 #define DEBUG_TYPE "simple-loop-unswitch"
67 
68 using namespace llvm;
69 using namespace llvm::PatternMatch;
70 
71 STATISTIC(NumBranches, "Number of branches unswitched");
72 STATISTIC(NumSwitches, "Number of switches unswitched");
73 STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
74 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
75 STATISTIC(
76  NumCostMultiplierSkipped,
77  "Number of unswitch candidates that had their cost multiplier skipped");
78 
80  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
81  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
82  "following the configuration passed into the pass."));
83 
84 static cl::opt<int>
85  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
86  cl::desc("The cost threshold for unswitching a loop."));
87 
89  "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
90  cl::desc("Enable unswitch cost multiplier that prohibits exponential "
91  "explosion in nontrivial unswitch."));
93  "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
94  cl::desc("Toplevel siblings divisor for cost multiplier."));
96  "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
97  cl::desc("Number of unswitch candidates that are ignored when calculating "
98  "cost multiplier."));
100  "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
101  cl::desc("If enabled, simple loop unswitching will also consider "
102  "llvm.experimental.guard intrinsics as unswitch candidates."));
104  "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
105  cl::init(false), cl::Hidden,
106  cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
107  "null checks to save time analyzing if we can keep it."));
108 static cl::opt<unsigned>
109  MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
110  cl::desc("Max number of memory uses to explore during "
111  "partial unswitching analysis"),
112  cl::init(100), cl::Hidden);
114  "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
115  cl::desc("If enabled, the freeze instruction will be added to condition "
116  "of loop unswitch to prevent miscompilation."));
117 
118 // Helper to skip (select x, true, false), which matches both a logical AND and
119 // OR and can confuse code that tries to determine if \p Cond is either a
120 // logical AND or OR but not both.
122  Value *CondNext;
123  while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
124  Cond = CondNext;
125  return Cond;
126 }
127 
128 /// Collect all of the loop invariant input values transitively used by the
129 /// homogeneous instruction graph from a given root.
130 ///
131 /// This essentially walks from a root recursively through loop variant operands
132 /// which have perform the same logical operation (AND or OR) and finds all
133 /// inputs which are loop invariant. For some operations these can be
134 /// re-associated and unswitched out of the loop entirely.
137  LoopInfo &LI) {
138  assert(!L.isLoopInvariant(&Root) &&
139  "Only need to walk the graph if root itself is not invariant.");
140  TinyPtrVector<Value *> Invariants;
141 
142  bool IsRootAnd = match(&Root, m_LogicalAnd());
143  bool IsRootOr = match(&Root, m_LogicalOr());
144 
145  // Build a worklist and recurse through operators collecting invariants.
148  Worklist.push_back(&Root);
149  Visited.insert(&Root);
150  do {
151  Instruction &I = *Worklist.pop_back_val();
152  for (Value *OpV : I.operand_values()) {
153  // Skip constants as unswitching isn't interesting for them.
154  if (isa<Constant>(OpV))
155  continue;
156 
157  // Add it to our result if loop invariant.
158  if (L.isLoopInvariant(OpV)) {
159  Invariants.push_back(OpV);
160  continue;
161  }
162 
163  // If not an instruction with the same opcode, nothing we can do.
164  Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
165 
166  if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
167  (IsRootOr && match(OpI, m_LogicalOr())))) {
168  // Visit this operand.
169  if (Visited.insert(OpI).second)
170  Worklist.push_back(OpI);
171  }
172  }
173  } while (!Worklist.empty());
174 
175  return Invariants;
176 }
177 
178 static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
179  Constant &Replacement) {
180  assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
181 
182  // Replace uses of LIC in the loop with the given constant.
183  // We use make_early_inc_range as set invalidates the iterator.
184  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
185  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
186 
187  // Replace this use within the loop body.
188  if (UserI && L.contains(UserI))
189  U.set(&Replacement);
190  }
191 }
192 
193 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
194 /// incoming values along this edge.
195 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
196  BasicBlock &ExitBB) {
197  for (Instruction &I : ExitBB) {
198  auto *PN = dyn_cast<PHINode>(&I);
199  if (!PN)
200  // No more PHIs to check.
201  return true;
202 
203  // If the incoming value for this edge isn't loop invariant the unswitch
204  // won't be trivial.
205  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
206  return false;
207  }
208  llvm_unreachable("Basic blocks should never be empty!");
209 }
210 
211 /// Copy a set of loop invariant values \p ToDuplicate and insert them at the
212 /// end of \p BB and conditionally branch on the copied condition. We only
213 /// branch on a single value.
215  BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
216  BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
218  IRBuilder<> IRB(&BB);
219 
220  SmallVector<Value *> FrozenInvariants;
221  for (Value *Inv : Invariants) {
222  if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
223  Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
224  FrozenInvariants.push_back(Inv);
225  }
226 
227  Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
228  : IRB.CreateAnd(FrozenInvariants);
229  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
230  Direction ? &NormalSucc : &UnswitchedSucc);
231 }
232 
233 /// Copy a set of loop invariant values, and conditionally branch on them.
235  BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
236  BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
237  MemorySSAUpdater *MSSAU) {
238  ValueToValueMapTy VMap;
239  for (auto *Val : reverse(ToDuplicate)) {
240  Instruction *Inst = cast<Instruction>(Val);
241  Instruction *NewInst = Inst->clone();
242  BB.getInstList().insert(BB.end(), NewInst);
243  RemapInstruction(NewInst, VMap,
245  VMap[Val] = NewInst;
246 
247  if (!MSSAU)
248  continue;
249 
250  MemorySSA *MSSA = MSSAU->getMemorySSA();
251  if (auto *MemUse =
252  dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
253  auto *DefiningAccess = MemUse->getDefiningAccess();
254  // Get the first defining access before the loop.
255  while (L.contains(DefiningAccess->getBlock())) {
256  // If the defining access is a MemoryPhi, get the incoming
257  // value for the pre-header as defining access.
258  if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
259  DefiningAccess =
260  MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
261  else
262  DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
263  }
264  MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
265  NewInst->getParent(),
267  }
268  }
269 
270  IRBuilder<> IRB(&BB);
271  Value *Cond = VMap[ToDuplicate[0]];
272  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
273  Direction ? &NormalSucc : &UnswitchedSucc);
274 }
275 
276 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
277 ///
278 /// Requires that the loop exit and unswitched basic block are the same, and
279 /// that the exiting block was a unique predecessor of that block. Rewrites the
280 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
281 /// PHI nodes from the old preheader that now contains the unswitched
282 /// terminator.
284  BasicBlock &OldExitingBB,
285  BasicBlock &OldPH) {
286  for (PHINode &PN : UnswitchedBB.phis()) {
287  // When the loop exit is directly unswitched we just need to update the
288  // incoming basic block. We loop to handle weird cases with repeated
289  // incoming blocks, but expect to typically only have one operand here.
290  for (auto i : seq<int>(0, PN.getNumOperands())) {
291  assert(PN.getIncomingBlock(i) == &OldExitingBB &&
292  "Found incoming block different from unique predecessor!");
293  PN.setIncomingBlock(i, &OldPH);
294  }
295  }
296 }
297 
298 /// Rewrite the PHI nodes in the loop exit basic block and the split off
299 /// unswitched block.
300 ///
301 /// Because the exit block remains an exit from the loop, this rewrites the
302 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
303 /// nodes into the unswitched basic block to select between the value in the
304 /// old preheader and the loop exit.
306  BasicBlock &UnswitchedBB,
307  BasicBlock &OldExitingBB,
308  BasicBlock &OldPH,
309  bool FullUnswitch) {
310  assert(&ExitBB != &UnswitchedBB &&
311  "Must have different loop exit and unswitched blocks!");
312  Instruction *InsertPt = &*UnswitchedBB.begin();
313  for (PHINode &PN : ExitBB.phis()) {
314  auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
315  PN.getName() + ".split", InsertPt);
316 
317  // Walk backwards over the old PHI node's inputs to minimize the cost of
318  // removing each one. We have to do this weird loop manually so that we
319  // create the same number of new incoming edges in the new PHI as we expect
320  // each case-based edge to be included in the unswitched switch in some
321  // cases.
322  // FIXME: This is really, really gross. It would be much cleaner if LLVM
323  // allowed us to create a single entry for a predecessor block without
324  // having separate entries for each "edge" even though these edges are
325  // required to produce identical results.
326  for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
327  if (PN.getIncomingBlock(i) != &OldExitingBB)
328  continue;
329 
330  Value *Incoming = PN.getIncomingValue(i);
331  if (FullUnswitch)
332  // No more edge from the old exiting block to the exit block.
333  PN.removeIncomingValue(i);
334 
335  NewPN->addIncoming(Incoming, &OldPH);
336  }
337 
338  // Now replace the old PHI with the new one and wire the old one in as an
339  // input to the new one.
340  PN.replaceAllUsesWith(NewPN);
341  NewPN->addIncoming(&PN, &ExitBB);
342  }
343 }
344 
345 /// Hoist the current loop up to the innermost loop containing a remaining exit.
346 ///
347 /// Because we've removed an exit from the loop, we may have changed the set of
348 /// loops reachable and need to move the current loop up the loop nest or even
349 /// to an entirely separate nest.
350 static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
351  DominatorTree &DT, LoopInfo &LI,
352  MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
353  // If the loop is already at the top level, we can't hoist it anywhere.
354  Loop *OldParentL = L.getParentLoop();
355  if (!OldParentL)
356  return;
357 
359  L.getExitBlocks(Exits);
360  Loop *NewParentL = nullptr;
361  for (auto *ExitBB : Exits)
362  if (Loop *ExitL = LI.getLoopFor(ExitBB))
363  if (!NewParentL || NewParentL->contains(ExitL))
364  NewParentL = ExitL;
365 
366  if (NewParentL == OldParentL)
367  return;
368 
369  // The new parent loop (if different) should always contain the old one.
370  if (NewParentL)
371  assert(NewParentL->contains(OldParentL) &&
372  "Can only hoist this loop up the nest!");
373 
374  // The preheader will need to move with the body of this loop. However,
375  // because it isn't in this loop we also need to update the primary loop map.
376  assert(OldParentL == LI.getLoopFor(&Preheader) &&
377  "Parent loop of this loop should contain this loop's preheader!");
378  LI.changeLoopFor(&Preheader, NewParentL);
379 
380  // Remove this loop from its old parent.
381  OldParentL->removeChildLoop(&L);
382 
383  // Add the loop either to the new parent or as a top-level loop.
384  if (NewParentL)
385  NewParentL->addChildLoop(&L);
386  else
387  LI.addTopLevelLoop(&L);
388 
389  // Remove this loops blocks from the old parent and every other loop up the
390  // nest until reaching the new parent. Also update all of these
391  // no-longer-containing loops to reflect the nesting change.
392  for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
393  OldContainingL = OldContainingL->getParentLoop()) {
394  llvm::erase_if(OldContainingL->getBlocksVector(),
395  [&](const BasicBlock *BB) {
396  return BB == &Preheader || L.contains(BB);
397  });
398 
399  OldContainingL->getBlocksSet().erase(&Preheader);
400  for (BasicBlock *BB : L.blocks())
401  OldContainingL->getBlocksSet().erase(BB);
402 
403  // Because we just hoisted a loop out of this one, we have essentially
404  // created new exit paths from it. That means we need to form LCSSA PHI
405  // nodes for values used in the no-longer-nested loop.
406  formLCSSA(*OldContainingL, DT, &LI, SE);
407 
408  // We shouldn't need to form dedicated exits because the exit introduced
409  // here is the (just split by unswitching) preheader. However, after trivial
410  // unswitching it is possible to get new non-dedicated exits out of parent
411  // loop so let's conservatively form dedicated exit blocks and figure out
412  // if we can optimize later.
413  formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
414  /*PreserveLCSSA*/ true);
415  }
416 }
417 
418 // Return the top-most loop containing ExitBB and having ExitBB as exiting block
419 // or the loop containing ExitBB, if there is no parent loop containing ExitBB
420 // as exiting block.
422  Loop *TopMost = LI.getLoopFor(ExitBB);
423  Loop *Current = TopMost;
424  while (Current) {
425  if (Current->isLoopExiting(ExitBB))
426  TopMost = Current;
427  Current = Current->getParentLoop();
428  }
429  return TopMost;
430 }
431 
432 /// Unswitch a trivial branch if the condition is loop invariant.
433 ///
434 /// This routine should only be called when loop code leading to the branch has
435 /// been validated as trivial (no side effects). This routine checks if the
436 /// condition is invariant and one of the successors is a loop exit. This
437 /// allows us to unswitch without duplicating the loop, making it trivial.
438 ///
439 /// If this routine fails to unswitch the branch it returns false.
440 ///
441 /// If the branch can be unswitched, this routine splits the preheader and
442 /// hoists the branch above that split. Preserves loop simplified form
443 /// (splitting the exit block as necessary). It simplifies the branch within
444 /// the loop to an unconditional branch but doesn't remove it entirely. Further
445 /// cleanup can be done with some simplifycfg like pass.
446 ///
447 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
448 /// invalidated by this.
450  LoopInfo &LI, ScalarEvolution *SE,
451  MemorySSAUpdater *MSSAU) {
452  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
453  LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
454 
455  // The loop invariant values that we want to unswitch.
456  TinyPtrVector<Value *> Invariants;
457 
458  // When true, we're fully unswitching the branch rather than just unswitching
459  // some input conditions to the branch.
460  bool FullUnswitch = false;
461 
463  if (L.isLoopInvariant(Cond)) {
464  Invariants.push_back(Cond);
465  FullUnswitch = true;
466  } else {
467  if (auto *CondInst = dyn_cast<Instruction>(Cond))
468  Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
469  if (Invariants.empty()) {
470  LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
471  return false;
472  }
473  }
474 
475  // Check that one of the branch's successors exits, and which one.
476  bool ExitDirection = true;
477  int LoopExitSuccIdx = 0;
478  auto *LoopExitBB = BI.getSuccessor(0);
479  if (L.contains(LoopExitBB)) {
480  ExitDirection = false;
481  LoopExitSuccIdx = 1;
482  LoopExitBB = BI.getSuccessor(1);
483  if (L.contains(LoopExitBB)) {
484  LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
485  return false;
486  }
487  }
488  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
489  auto *ParentBB = BI.getParent();
490  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
491  LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
492  return false;
493  }
494 
495  // When unswitching only part of the branch's condition, we need the exit
496  // block to be reached directly from the partially unswitched input. This can
497  // be done when the exit block is along the true edge and the branch condition
498  // is a graph of `or` operations, or the exit block is along the false edge
499  // and the condition is a graph of `and` operations.
500  if (!FullUnswitch) {
501  if (ExitDirection ? !match(Cond, m_LogicalOr())
502  : !match(Cond, m_LogicalAnd())) {
503  LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
504  "non-full unswitch!\n");
505  return false;
506  }
507  }
508 
509  LLVM_DEBUG({
510  dbgs() << " unswitching trivial invariant conditions for: " << BI
511  << "\n";
512  for (Value *Invariant : Invariants) {
513  dbgs() << " " << *Invariant << " == true";
514  if (Invariant != Invariants.back())
515  dbgs() << " ||";
516  dbgs() << "\n";
517  }
518  });
519 
520  // If we have scalar evolutions, we need to invalidate them including this
521  // loop, the loop containing the exit block and the topmost parent loop
522  // exiting via LoopExitBB.
523  if (SE) {
524  if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
525  SE->forgetLoop(ExitL);
526  else
527  // Forget the entire nest as this exits the entire nest.
528  SE->forgetTopmostLoop(&L);
529  }
530 
531  if (MSSAU && VerifyMemorySSA)
532  MSSAU->getMemorySSA()->verifyMemorySSA();
533 
534  // Split the preheader, so that we know that there is a safe place to insert
535  // the conditional branch. We will change the preheader to have a conditional
536  // branch on LoopCond.
537  BasicBlock *OldPH = L.getLoopPreheader();
538  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
539 
540  // Now that we have a place to insert the conditional branch, create a place
541  // to branch to: this is the exit block out of the loop that we are
542  // unswitching. We need to split this if there are other loop predecessors.
543  // Because the loop is in simplified form, *any* other predecessor is enough.
544  BasicBlock *UnswitchedBB;
545  if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
546  assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
547  "A branch's parent isn't a predecessor!");
548  UnswitchedBB = LoopExitBB;
549  } else {
550  UnswitchedBB =
551  SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
552  }
553 
554  if (MSSAU && VerifyMemorySSA)
555  MSSAU->getMemorySSA()->verifyMemorySSA();
556 
557  // Actually move the invariant uses into the unswitched position. If possible,
558  // we do this by moving the instructions, but when doing partial unswitching
559  // we do it by building a new merge of the values in the unswitched position.
560  OldPH->getTerminator()->eraseFromParent();
561  if (FullUnswitch) {
562  // If fully unswitching, we can use the existing branch instruction.
563  // Splice it into the old PH to gate reaching the new preheader and re-point
564  // its successors.
565  OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
566  BI);
567  BI.setCondition(Cond);
568  if (MSSAU) {
569  // Temporarily clone the terminator, to make MSSA update cheaper by
570  // separating "insert edge" updates from "remove edge" ones.
571  ParentBB->getInstList().push_back(BI.clone());
572  } else {
573  // Create a new unconditional branch that will continue the loop as a new
574  // terminator.
575  BranchInst::Create(ContinueBB, ParentBB);
576  }
577  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
578  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
579  } else {
580  // Only unswitching a subset of inputs to the condition, so we will need to
581  // build a new branch that merges the invariant inputs.
582  if (ExitDirection)
584  "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
585  "condition!");
586  else
588  "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
589  " condition!");
591  *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
592  FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
593  }
594 
595  // Update the dominator tree with the added edge.
596  DT.insertEdge(OldPH, UnswitchedBB);
597 
598  // After the dominator tree was updated with the added edge, update MemorySSA
599  // if available.
600  if (MSSAU) {
602  Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
603  MSSAU->applyInsertUpdates(Updates, DT);
604  }
605 
606  // Finish updating dominator tree and memory ssa for full unswitch.
607  if (FullUnswitch) {
608  if (MSSAU) {
609  // Remove the cloned branch instruction.
610  ParentBB->getTerminator()->eraseFromParent();
611  // Create unconditional branch now.
612  BranchInst::Create(ContinueBB, ParentBB);
613  MSSAU->removeEdge(ParentBB, LoopExitBB);
614  }
615  DT.deleteEdge(ParentBB, LoopExitBB);
616  }
617 
618  if (MSSAU && VerifyMemorySSA)
619  MSSAU->getMemorySSA()->verifyMemorySSA();
620 
621  // Rewrite the relevant PHI nodes.
622  if (UnswitchedBB == LoopExitBB)
623  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
624  else
625  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
626  *ParentBB, *OldPH, FullUnswitch);
627 
628  // The constant we can replace all of our invariants with inside the loop
629  // body. If any of the invariants have a value other than this the loop won't
630  // be entered.
631  ConstantInt *Replacement = ExitDirection
634 
635  // Since this is an i1 condition we can also trivially replace uses of it
636  // within the loop with a constant.
637  for (Value *Invariant : Invariants)
638  replaceLoopInvariantUses(L, Invariant, *Replacement);
639 
640  // If this was full unswitching, we may have changed the nesting relationship
641  // for this loop so hoist it to its correct parent if needed.
642  if (FullUnswitch)
643  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
644 
645  if (MSSAU && VerifyMemorySSA)
646  MSSAU->getMemorySSA()->verifyMemorySSA();
647 
648  LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
649  ++NumTrivial;
650  ++NumBranches;
651  return true;
652 }
653 
654 /// Unswitch a trivial switch if the condition is loop invariant.
655 ///
656 /// This routine should only be called when loop code leading to the switch has
657 /// been validated as trivial (no side effects). This routine checks if the
658 /// condition is invariant and that at least one of the successors is a loop
659 /// exit. This allows us to unswitch without duplicating the loop, making it
660 /// trivial.
661 ///
662 /// If this routine fails to unswitch the switch it returns false.
663 ///
664 /// If the switch can be unswitched, this routine splits the preheader and
665 /// copies the switch above that split. If the default case is one of the
666 /// exiting cases, it copies the non-exiting cases and points them at the new
667 /// preheader. If the default case is not exiting, it copies the exiting cases
668 /// and points the default at the preheader. It preserves loop simplified form
669 /// (splitting the exit blocks as necessary). It simplifies the switch within
670 /// the loop by removing now-dead cases. If the default case is one of those
671 /// unswitched, it replaces its destination with a new basic block containing
672 /// only unreachable. Such basic blocks, while technically loop exits, are not
673 /// considered for unswitching so this is a stable transform and the same
674 /// switch will not be revisited. If after unswitching there is only a single
675 /// in-loop successor, the switch is further simplified to an unconditional
676 /// branch. Still more cleanup can be done with some simplifycfg like pass.
677 ///
678 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
679 /// invalidated by this.
681  LoopInfo &LI, ScalarEvolution *SE,
682  MemorySSAUpdater *MSSAU) {
683  LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
684  Value *LoopCond = SI.getCondition();
685 
686  // If this isn't switching on an invariant condition, we can't unswitch it.
687  if (!L.isLoopInvariant(LoopCond))
688  return false;
689 
690  auto *ParentBB = SI.getParent();
691 
692  // The same check must be used both for the default and the exit cases. We
693  // should never leave edges from the switch instruction to a basic block that
694  // we are unswitching, hence the condition used to determine the default case
695  // needs to also be used to populate ExitCaseIndices, which is then used to
696  // remove cases from the switch.
697  auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
698  // BBToCheck is not an exit block if it is inside loop L.
699  if (L.contains(&BBToCheck))
700  return false;
701  // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
702  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
703  return false;
704  // We do not unswitch a block that only has an unreachable statement, as
705  // it's possible this is a previously unswitched block. Only unswitch if
706  // either the terminator is not unreachable, or, if it is, it's not the only
707  // instruction in the block.
708  auto *TI = BBToCheck.getTerminator();
709  bool isUnreachable = isa<UnreachableInst>(TI);
710  return !isUnreachable ||
711  (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
712  };
713 
714  SmallVector<int, 4> ExitCaseIndices;
715  for (auto Case : SI.cases())
716  if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
717  ExitCaseIndices.push_back(Case.getCaseIndex());
718  BasicBlock *DefaultExitBB = nullptr;
721  if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
722  DefaultExitBB = SI.getDefaultDest();
723  } else if (ExitCaseIndices.empty())
724  return false;
725 
726  LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
727 
728  if (MSSAU && VerifyMemorySSA)
729  MSSAU->getMemorySSA()->verifyMemorySSA();
730 
731  // We may need to invalidate SCEVs for the outermost loop reached by any of
732  // the exits.
733  Loop *OuterL = &L;
734 
735  if (DefaultExitBB) {
736  // Clear out the default destination temporarily to allow accurate
737  // predecessor lists to be examined below.
738  SI.setDefaultDest(nullptr);
739  // Check the loop containing this exit.
740  Loop *ExitL = LI.getLoopFor(DefaultExitBB);
741  if (!ExitL || ExitL->contains(OuterL))
742  OuterL = ExitL;
743  }
744 
745  // Store the exit cases into a separate data structure and remove them from
746  // the switch.
747  SmallVector<std::tuple<ConstantInt *, BasicBlock *,
749  4> ExitCases;
750  ExitCases.reserve(ExitCaseIndices.size());
752  // We walk the case indices backwards so that we remove the last case first
753  // and don't disrupt the earlier indices.
754  for (unsigned Index : reverse(ExitCaseIndices)) {
755  auto CaseI = SI.case_begin() + Index;
756  // Compute the outer loop from this exit.
757  Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
758  if (!ExitL || ExitL->contains(OuterL))
759  OuterL = ExitL;
760  // Save the value of this case.
761  auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
762  ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
763  // Delete the unswitched cases.
764  SIW.removeCase(CaseI);
765  }
766 
767  if (SE) {
768  if (OuterL)
769  SE->forgetLoop(OuterL);
770  else
771  SE->forgetTopmostLoop(&L);
772  }
773 
774  // Check if after this all of the remaining cases point at the same
775  // successor.
776  BasicBlock *CommonSuccBB = nullptr;
777  if (SI.getNumCases() > 0 &&
778  all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
779  return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
780  }))
781  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
782  if (!DefaultExitBB) {
783  // If we're not unswitching the default, we need it to match any cases to
784  // have a common successor or if we have no cases it is the common
785  // successor.
786  if (SI.getNumCases() == 0)
787  CommonSuccBB = SI.getDefaultDest();
788  else if (SI.getDefaultDest() != CommonSuccBB)
789  CommonSuccBB = nullptr;
790  }
791 
792  // Split the preheader, so that we know that there is a safe place to insert
793  // the switch.
794  BasicBlock *OldPH = L.getLoopPreheader();
795  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
796  OldPH->getTerminator()->eraseFromParent();
797 
798  // Now add the unswitched switch.
799  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
800  SwitchInstProfUpdateWrapper NewSIW(*NewSI);
801 
802  // Rewrite the IR for the unswitched basic blocks. This requires two steps.
803  // First, we split any exit blocks with remaining in-loop predecessors. Then
804  // we update the PHIs in one of two ways depending on if there was a split.
805  // We walk in reverse so that we split in the same order as the cases
806  // appeared. This is purely for convenience of reading the resulting IR, but
807  // it doesn't cost anything really.
808  SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
810  // Handle the default exit if necessary.
811  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
812  // ranges aren't quite powerful enough yet.
813  if (DefaultExitBB) {
814  if (pred_empty(DefaultExitBB)) {
815  UnswitchedExitBBs.insert(DefaultExitBB);
816  rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
817  } else {
818  auto *SplitBB =
819  SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
820  rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
821  *ParentBB, *OldPH,
822  /*FullUnswitch*/ true);
823  DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
824  }
825  }
826  // Note that we must use a reference in the for loop so that we update the
827  // container.
828  for (auto &ExitCase : reverse(ExitCases)) {
829  // Grab a reference to the exit block in the pair so that we can update it.
830  BasicBlock *ExitBB = std::get<1>(ExitCase);
831 
832  // If this case is the last edge into the exit block, we can simply reuse it
833  // as it will no longer be a loop exit. No mapping necessary.
834  if (pred_empty(ExitBB)) {
835  // Only rewrite once.
836  if (UnswitchedExitBBs.insert(ExitBB).second)
837  rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
838  continue;
839  }
840 
841  // Otherwise we need to split the exit block so that we retain an exit
842  // block from the loop and a target for the unswitched condition.
843  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
844  if (!SplitExitBB) {
845  // If this is the first time we see this, do the split and remember it.
846  SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
847  rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
848  *ParentBB, *OldPH,
849  /*FullUnswitch*/ true);
850  }
851  // Update the case pair to point to the split block.
852  std::get<1>(ExitCase) = SplitExitBB;
853  }
854 
855  // Now add the unswitched cases. We do this in reverse order as we built them
856  // in reverse order.
857  for (auto &ExitCase : reverse(ExitCases)) {
858  ConstantInt *CaseVal = std::get<0>(ExitCase);
859  BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
860 
861  NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
862  }
863 
864  // If the default was unswitched, re-point it and add explicit cases for
865  // entering the loop.
866  if (DefaultExitBB) {
867  NewSIW->setDefaultDest(DefaultExitBB);
868  NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
869 
870  // We removed all the exit cases, so we just copy the cases to the
871  // unswitched switch.
872  for (const auto &Case : SI.cases())
873  NewSIW.addCase(Case.getCaseValue(), NewPH,
875  } else if (DefaultCaseWeight) {
876  // We have to set branch weight of the default case.
877  uint64_t SW = *DefaultCaseWeight;
878  for (const auto &Case : SI.cases()) {
879  auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
880  assert(W &&
881  "case weight must be defined as default case weight is defined");
882  SW += *W;
883  }
884  NewSIW.setSuccessorWeight(0, SW);
885  }
886 
887  // If we ended up with a common successor for every path through the switch
888  // after unswitching, rewrite it to an unconditional branch to make it easy
889  // to recognize. Otherwise we potentially have to recognize the default case
890  // pointing at unreachable and other complexity.
891  if (CommonSuccBB) {
892  BasicBlock *BB = SI.getParent();
893  // We may have had multiple edges to this common successor block, so remove
894  // them as predecessors. We skip the first one, either the default or the
895  // actual first case.
896  bool SkippedFirst = DefaultExitBB == nullptr;
897  for (auto Case : SI.cases()) {
898  assert(Case.getCaseSuccessor() == CommonSuccBB &&
899  "Non-common successor!");
900  (void)Case;
901  if (!SkippedFirst) {
902  SkippedFirst = true;
903  continue;
904  }
905  CommonSuccBB->removePredecessor(BB,
906  /*KeepOneInputPHIs*/ true);
907  }
908  // Now nuke the switch and replace it with a direct branch.
909  SIW.eraseFromParent();
910  BranchInst::Create(CommonSuccBB, BB);
911  } else if (DefaultExitBB) {
912  assert(SI.getNumCases() > 0 &&
913  "If we had no cases we'd have a common successor!");
914  // Move the last case to the default successor. This is valid as if the
915  // default got unswitched it cannot be reached. This has the advantage of
916  // being simple and keeping the number of edges from this switch to
917  // successors the same, and avoiding any PHI update complexity.
918  auto LastCaseI = std::prev(SI.case_end());
919 
920  SI.setDefaultDest(LastCaseI->getCaseSuccessor());
921  SIW.setSuccessorWeight(
922  0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
923  SIW.removeCase(LastCaseI);
924  }
925 
926  // Walk the unswitched exit blocks and the unswitched split blocks and update
927  // the dominator tree based on the CFG edits. While we are walking unordered
928  // containers here, the API for applyUpdates takes an unordered list of
929  // updates and requires them to not contain duplicates.
931  for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
932  DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
933  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
934  }
935  for (auto SplitUnswitchedPair : SplitExitBBMap) {
936  DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
937  DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
938  }
939 
940  if (MSSAU) {
941  MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
942  if (VerifyMemorySSA)
943  MSSAU->getMemorySSA()->verifyMemorySSA();
944  } else {
945  DT.applyUpdates(DTUpdates);
946  }
947 
949 
950  // We may have changed the nesting relationship for this loop so hoist it to
951  // its correct parent if needed.
952  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
953 
954  if (MSSAU && VerifyMemorySSA)
955  MSSAU->getMemorySSA()->verifyMemorySSA();
956 
957  ++NumTrivial;
958  ++NumSwitches;
959  LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
960  return true;
961 }
962 
963 /// This routine scans the loop to find a branch or switch which occurs before
964 /// any side effects occur. These can potentially be unswitched without
965 /// duplicating the loop. If a branch or switch is successfully unswitched the
966 /// scanning continues to see if subsequent branches or switches have become
967 /// trivial. Once all trivial candidates have been unswitched, this routine
968 /// returns.
969 ///
970 /// The return value indicates whether anything was unswitched (and therefore
971 /// changed).
972 ///
973 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
974 /// invalidated by this.
976  LoopInfo &LI, ScalarEvolution *SE,
977  MemorySSAUpdater *MSSAU) {
978  bool Changed = false;
979 
980  // If loop header has only one reachable successor we should keep looking for
981  // trivial condition candidates in the successor as well. An alternative is
982  // to constant fold conditions and merge successors into loop header (then we
983  // only need to check header's terminator). The reason for not doing this in
984  // LoopUnswitch pass is that it could potentially break LoopPassManager's
985  // invariants. Folding dead branches could either eliminate the current loop
986  // or make other loops unreachable. LCSSA form might also not be preserved
987  // after deleting branches. The following code keeps traversing loop header's
988  // successors until it finds the trivial condition candidate (condition that
989  // is not a constant). Since unswitching generates branches with constant
990  // conditions, this scenario could be very common in practice.
991  BasicBlock *CurrentBB = L.getHeader();
993  Visited.insert(CurrentBB);
994  do {
995  // Check if there are any side-effecting instructions (e.g. stores, calls,
996  // volatile loads) in the part of the loop that the code *would* execute
997  // without unswitching.
998  if (MSSAU) // Possible early exit with MSSA
999  if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1000  if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1001  return Changed;
1002  if (llvm::any_of(*CurrentBB,
1003  [](Instruction &I) { return I.mayHaveSideEffects(); }))
1004  return Changed;
1005 
1006  Instruction *CurrentTerm = CurrentBB->getTerminator();
1007 
1008  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1009  // Don't bother trying to unswitch past a switch with a constant
1010  // condition. This should be removed prior to running this pass by
1011  // simplifycfg.
1012  if (isa<Constant>(SI->getCondition()))
1013  return Changed;
1014 
1015  if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1016  // Couldn't unswitch this one so we're done.
1017  return Changed;
1018 
1019  // Mark that we managed to unswitch something.
1020  Changed = true;
1021 
1022  // If unswitching turned the terminator into an unconditional branch then
1023  // we can continue. The unswitching logic specifically works to fold any
1024  // cases it can into an unconditional branch to make it easier to
1025  // recognize here.
1026  auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1027  if (!BI || BI->isConditional())
1028  return Changed;
1029 
1030  CurrentBB = BI->getSuccessor(0);
1031  continue;
1032  }
1033 
1034  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1035  if (!BI)
1036  // We do not understand other terminator instructions.
1037  return Changed;
1038 
1039  // Don't bother trying to unswitch past an unconditional branch or a branch
1040  // with a constant value. These should be removed by simplifycfg prior to
1041  // running this pass.
1042  if (!BI->isConditional() ||
1043  isa<Constant>(skipTrivialSelect(BI->getCondition())))
1044  return Changed;
1045 
1046  // Found a trivial condition candidate: non-foldable conditional branch. If
1047  // we fail to unswitch this, we can't do anything else that is trivial.
1048  if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1049  return Changed;
1050 
1051  // Mark that we managed to unswitch something.
1052  Changed = true;
1053 
1054  // If we only unswitched some of the conditions feeding the branch, we won't
1055  // have collapsed it to a single successor.
1056  BI = cast<BranchInst>(CurrentBB->getTerminator());
1057  if (BI->isConditional())
1058  return Changed;
1059 
1060  // Follow the newly unconditional branch into its successor.
1061  CurrentBB = BI->getSuccessor(0);
1062 
1063  // When continuing, if we exit the loop or reach a previous visited block,
1064  // then we can not reach any trivial condition candidates (unfoldable
1065  // branch instructions or switch instructions) and no unswitch can happen.
1066  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1067 
1068  return Changed;
1069 }
1070 
1071 /// Build the cloned blocks for an unswitched copy of the given loop.
1072 ///
1073 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1074 /// after the split block (`SplitBB`) that will be used to select between the
1075 /// cloned and original loop.
1076 ///
1077 /// This routine handles cloning all of the necessary loop blocks and exit
1078 /// blocks including rewriting their instructions and the relevant PHI nodes.
1079 /// Any loop blocks or exit blocks which are dominated by a different successor
1080 /// than the one for this clone of the loop blocks can be trivially skipped. We
1081 /// use the `DominatingSucc` map to determine whether a block satisfies that
1082 /// property with a simple map lookup.
1083 ///
1084 /// It also correctly creates the unconditional branch in the cloned
1085 /// unswitched parent block to only point at the unswitched successor.
1086 ///
1087 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1088 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
1089 /// the cloned blocks (and their loops) are left without full `LoopInfo`
1090 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1091 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
1092 /// instead the caller must recompute an accurate DT. It *does* correctly
1093 /// update the `AssumptionCache` provided in `AC`.
1095  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1096  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1097  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1098  const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1099  ValueToValueMapTy &VMap,
1101  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
1102  SmallVector<BasicBlock *, 4> NewBlocks;
1103  NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1104 
1105  // We will need to clone a bunch of blocks, wrap up the clone operation in
1106  // a helper.
1107  auto CloneBlock = [&](BasicBlock *OldBB) {
1108  // Clone the basic block and insert it before the new preheader.
1109  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1110  NewBB->moveBefore(LoopPH);
1111 
1112  // Record this block and the mapping.
1113  NewBlocks.push_back(NewBB);
1114  VMap[OldBB] = NewBB;
1115 
1116  return NewBB;
1117  };
1118 
1119  // We skip cloning blocks when they have a dominating succ that is not the
1120  // succ we are cloning for.
1121  auto SkipBlock = [&](BasicBlock *BB) {
1122  auto It = DominatingSucc.find(BB);
1123  return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1124  };
1125 
1126  // First, clone the preheader.
1127  auto *ClonedPH = CloneBlock(LoopPH);
1128 
1129  // Then clone all the loop blocks, skipping the ones that aren't necessary.
1130  for (auto *LoopBB : L.blocks())
1131  if (!SkipBlock(LoopBB))
1132  CloneBlock(LoopBB);
1133 
1134  // Split all the loop exit edges so that when we clone the exit blocks, if
1135  // any of the exit blocks are *also* a preheader for some other loop, we
1136  // don't create multiple predecessors entering the loop header.
1137  for (auto *ExitBB : ExitBlocks) {
1138  if (SkipBlock(ExitBB))
1139  continue;
1140 
1141  // When we are going to clone an exit, we don't need to clone all the
1142  // instructions in the exit block and we want to ensure we have an easy
1143  // place to merge the CFG, so split the exit first. This is always safe to
1144  // do because there cannot be any non-loop predecessors of a loop exit in
1145  // loop simplified form.
1146  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1147 
1148  // Rearrange the names to make it easier to write test cases by having the
1149  // exit block carry the suffix rather than the merge block carrying the
1150  // suffix.
1151  MergeBB->takeName(ExitBB);
1152  ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1153 
1154  // Now clone the original exit block.
1155  auto *ClonedExitBB = CloneBlock(ExitBB);
1156  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1157  "Exit block should have been split to have one successor!");
1158  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1159  "Cloned exit block has the wrong successor!");
1160 
1161  // Remap any cloned instructions and create a merge phi node for them.
1162  for (auto ZippedInsts : llvm::zip_first(
1163  llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1164  llvm::make_range(ClonedExitBB->begin(),
1165  std::prev(ClonedExitBB->end())))) {
1166  Instruction &I = std::get<0>(ZippedInsts);
1167  Instruction &ClonedI = std::get<1>(ZippedInsts);
1168 
1169  // The only instructions in the exit block should be PHI nodes and
1170  // potentially a landing pad.
1171  assert(
1172  (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1173  "Bad instruction in exit block!");
1174  // We should have a value map between the instruction and its clone.
1175  assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1176 
1177  auto *MergePN =
1178  PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1179  &*MergeBB->getFirstInsertionPt());
1180  I.replaceAllUsesWith(MergePN);
1181  MergePN->addIncoming(&I, ExitBB);
1182  MergePN->addIncoming(&ClonedI, ClonedExitBB);
1183  }
1184  }
1185 
1186  // Rewrite the instructions in the cloned blocks to refer to the instructions
1187  // in the cloned blocks. We have to do this as a second pass so that we have
1188  // everything available. Also, we have inserted new instructions which may
1189  // include assume intrinsics, so we update the assumption cache while
1190  // processing this.
1191  for (auto *ClonedBB : NewBlocks)
1192  for (Instruction &I : *ClonedBB) {
1193  RemapInstruction(&I, VMap,
1195  if (auto *II = dyn_cast<AssumeInst>(&I))
1196  AC.registerAssumption(II);
1197  }
1198 
1199  // Update any PHI nodes in the cloned successors of the skipped blocks to not
1200  // have spurious incoming values.
1201  for (auto *LoopBB : L.blocks())
1202  if (SkipBlock(LoopBB))
1203  for (auto *SuccBB : successors(LoopBB))
1204  if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1205  for (PHINode &PN : ClonedSuccBB->phis())
1206  PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1207 
1208  // Remove the cloned parent as a predecessor of any successor we ended up
1209  // cloning other than the unswitched one.
1210  auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1211  for (auto *SuccBB : successors(ParentBB)) {
1212  if (SuccBB == UnswitchedSuccBB)
1213  continue;
1214 
1215  auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1216  if (!ClonedSuccBB)
1217  continue;
1218 
1219  ClonedSuccBB->removePredecessor(ClonedParentBB,
1220  /*KeepOneInputPHIs*/ true);
1221  }
1222 
1223  // Replace the cloned branch with an unconditional branch to the cloned
1224  // unswitched successor.
1225  auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1226  Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1227  // Trivial Simplification. If Terminator is a conditional branch and
1228  // condition becomes dead - erase it.
1229  Value *ClonedConditionToErase = nullptr;
1230  if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1231  ClonedConditionToErase = BI->getCondition();
1232  else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1233  ClonedConditionToErase = SI->getCondition();
1234 
1235  ClonedTerminator->eraseFromParent();
1236  BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1237 
1238  if (ClonedConditionToErase)
1239  RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1240  MSSAU);
1241 
1242  // If there are duplicate entries in the PHI nodes because of multiple edges
1243  // to the unswitched successor, we need to nuke all but one as we replaced it
1244  // with a direct branch.
1245  for (PHINode &PN : ClonedSuccBB->phis()) {
1246  bool Found = false;
1247  // Loop over the incoming operands backwards so we can easily delete as we
1248  // go without invalidating the index.
1249  for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1250  if (PN.getIncomingBlock(i) != ClonedParentBB)
1251  continue;
1252  if (!Found) {
1253  Found = true;
1254  continue;
1255  }
1256  PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1257  }
1258  }
1259 
1260  // Record the domtree updates for the new blocks.
1262  for (auto *ClonedBB : NewBlocks) {
1263  for (auto *SuccBB : successors(ClonedBB))
1264  if (SuccSet.insert(SuccBB).second)
1265  DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1266  SuccSet.clear();
1267  }
1268 
1269  return ClonedPH;
1270 }
1271 
1272 /// Recursively clone the specified loop and all of its children.
1273 ///
1274 /// The target parent loop for the clone should be provided, or can be null if
1275 /// the clone is a top-level loop. While cloning, all the blocks are mapped
1276 /// with the provided value map. The entire original loop must be present in
1277 /// the value map. The cloned loop is returned.
1278 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1279  const ValueToValueMapTy &VMap, LoopInfo &LI) {
1280  auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1281  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1282  ClonedL.reserveBlocks(OrigL.getNumBlocks());
1283  for (auto *BB : OrigL.blocks()) {
1284  auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1285  ClonedL.addBlockEntry(ClonedBB);
1286  if (LI.getLoopFor(BB) == &OrigL)
1287  LI.changeLoopFor(ClonedBB, &ClonedL);
1288  }
1289  };
1290 
1291  // We specially handle the first loop because it may get cloned into
1292  // a different parent and because we most commonly are cloning leaf loops.
1293  Loop *ClonedRootL = LI.AllocateLoop();
1294  if (RootParentL)
1295  RootParentL->addChildLoop(ClonedRootL);
1296  else
1297  LI.addTopLevelLoop(ClonedRootL);
1298  AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1299 
1300  if (OrigRootL.isInnermost())
1301  return ClonedRootL;
1302 
1303  // If we have a nest, we can quickly clone the entire loop nest using an
1304  // iterative approach because it is a tree. We keep the cloned parent in the
1305  // data structure to avoid repeatedly querying through a map to find it.
1306  SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1307  // Build up the loops to clone in reverse order as we'll clone them from the
1308  // back.
1309  for (Loop *ChildL : llvm::reverse(OrigRootL))
1310  LoopsToClone.push_back({ClonedRootL, ChildL});
1311  do {
1312  Loop *ClonedParentL, *L;
1313  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1314  Loop *ClonedL = LI.AllocateLoop();
1315  ClonedParentL->addChildLoop(ClonedL);
1316  AddClonedBlocksToLoop(*L, *ClonedL);
1317  for (Loop *ChildL : llvm::reverse(*L))
1318  LoopsToClone.push_back({ClonedL, ChildL});
1319  } while (!LoopsToClone.empty());
1320 
1321  return ClonedRootL;
1322 }
1323 
1324 /// Build the cloned loops of an original loop from unswitching.
1325 ///
1326 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1327 /// operation. We need to re-verify that there even is a loop (as the backedge
1328 /// may not have been cloned), and even if there are remaining backedges the
1329 /// backedge set may be different. However, we know that each child loop is
1330 /// undisturbed, we only need to find where to place each child loop within
1331 /// either any parent loop or within a cloned version of the original loop.
1332 ///
1333 /// Because child loops may end up cloned outside of any cloned version of the
1334 /// original loop, multiple cloned sibling loops may be created. All of them
1335 /// are returned so that the newly introduced loop nest roots can be
1336 /// identified.
1337 static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1338  const ValueToValueMapTy &VMap, LoopInfo &LI,
1339  SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1340  Loop *ClonedL = nullptr;
1341 
1342  auto *OrigPH = OrigL.getLoopPreheader();
1343  auto *OrigHeader = OrigL.getHeader();
1344 
1345  auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1346  auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1347 
1348  // We need to know the loops of the cloned exit blocks to even compute the
1349  // accurate parent loop. If we only clone exits to some parent of the
1350  // original parent, we want to clone into that outer loop. We also keep track
1351  // of the loops that our cloned exit blocks participate in.
1352  Loop *ParentL = nullptr;
1353  SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1355  ClonedExitsInLoops.reserve(ExitBlocks.size());
1356  for (auto *ExitBB : ExitBlocks)
1357  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1358  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1359  ExitLoopMap[ClonedExitBB] = ExitL;
1360  ClonedExitsInLoops.push_back(ClonedExitBB);
1361  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1362  ParentL = ExitL;
1363  }
1364  assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1365  ParentL->contains(OrigL.getParentLoop())) &&
1366  "The computed parent loop should always contain (or be) the parent of "
1367  "the original loop.");
1368 
1369  // We build the set of blocks dominated by the cloned header from the set of
1370  // cloned blocks out of the original loop. While not all of these will
1371  // necessarily be in the cloned loop, it is enough to establish that they
1372  // aren't in unreachable cycles, etc.
1373  SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1374  for (auto *BB : OrigL.blocks())
1375  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1376  ClonedLoopBlocks.insert(ClonedBB);
1377 
1378  // Rebuild the set of blocks that will end up in the cloned loop. We may have
1379  // skipped cloning some region of this loop which can in turn skip some of
1380  // the backedges so we have to rebuild the blocks in the loop based on the
1381  // backedges that remain after cloning.
1383  SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1384  for (auto *Pred : predecessors(ClonedHeader)) {
1385  // The only possible non-loop header predecessor is the preheader because
1386  // we know we cloned the loop in simplified form.
1387  if (Pred == ClonedPH)
1388  continue;
1389 
1390  // Because the loop was in simplified form, the only non-loop predecessor
1391  // should be the preheader.
1392  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1393  "header other than the preheader "
1394  "that is not part of the loop!");
1395 
1396  // Insert this block into the loop set and on the first visit (and if it
1397  // isn't the header we're currently walking) put it into the worklist to
1398  // recurse through.
1399  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1400  Worklist.push_back(Pred);
1401  }
1402 
1403  // If we had any backedges then there *is* a cloned loop. Put the header into
1404  // the loop set and then walk the worklist backwards to find all the blocks
1405  // that remain within the loop after cloning.
1406  if (!BlocksInClonedLoop.empty()) {
1407  BlocksInClonedLoop.insert(ClonedHeader);
1408 
1409  while (!Worklist.empty()) {
1410  BasicBlock *BB = Worklist.pop_back_val();
1411  assert(BlocksInClonedLoop.count(BB) &&
1412  "Didn't put block into the loop set!");
1413 
1414  // Insert any predecessors that are in the possible set into the cloned
1415  // set, and if the insert is successful, add them to the worklist. Note
1416  // that we filter on the blocks that are definitely reachable via the
1417  // backedge to the loop header so we may prune out dead code within the
1418  // cloned loop.
1419  for (auto *Pred : predecessors(BB))
1420  if (ClonedLoopBlocks.count(Pred) &&
1421  BlocksInClonedLoop.insert(Pred).second)
1422  Worklist.push_back(Pred);
1423  }
1424 
1425  ClonedL = LI.AllocateLoop();
1426  if (ParentL) {
1427  ParentL->addBasicBlockToLoop(ClonedPH, LI);
1428  ParentL->addChildLoop(ClonedL);
1429  } else {
1430  LI.addTopLevelLoop(ClonedL);
1431  }
1432  NonChildClonedLoops.push_back(ClonedL);
1433 
1434  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1435  // We don't want to just add the cloned loop blocks based on how we
1436  // discovered them. The original order of blocks was carefully built in
1437  // a way that doesn't rely on predecessor ordering. Rather than re-invent
1438  // that logic, we just re-walk the original blocks (and those of the child
1439  // loops) and filter them as we add them into the cloned loop.
1440  for (auto *BB : OrigL.blocks()) {
1441  auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1442  if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1443  continue;
1444 
1445  // Directly add the blocks that are only in this loop.
1446  if (LI.getLoopFor(BB) == &OrigL) {
1447  ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1448  continue;
1449  }
1450 
1451  // We want to manually add it to this loop and parents.
1452  // Registering it with LoopInfo will happen when we clone the top
1453  // loop for this block.
1454  for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1455  PL->addBlockEntry(ClonedBB);
1456  }
1457 
1458  // Now add each child loop whose header remains within the cloned loop. All
1459  // of the blocks within the loop must satisfy the same constraints as the
1460  // header so once we pass the header checks we can just clone the entire
1461  // child loop nest.
1462  for (Loop *ChildL : OrigL) {
1463  auto *ClonedChildHeader =
1464  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1465  if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1466  continue;
1467 
1468 #ifndef NDEBUG
1469  // We should never have a cloned child loop header but fail to have
1470  // all of the blocks for that child loop.
1471  for (auto *ChildLoopBB : ChildL->blocks())
1472  assert(BlocksInClonedLoop.count(
1473  cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1474  "Child cloned loop has a header within the cloned outer "
1475  "loop but not all of its blocks!");
1476 #endif
1477 
1478  cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1479  }
1480  }
1481 
1482  // Now that we've handled all the components of the original loop that were
1483  // cloned into a new loop, we still need to handle anything from the original
1484  // loop that wasn't in a cloned loop.
1485 
1486  // Figure out what blocks are left to place within any loop nest containing
1487  // the unswitched loop. If we never formed a loop, the cloned PH is one of
1488  // them.
1489  SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1490  if (BlocksInClonedLoop.empty())
1491  UnloopedBlockSet.insert(ClonedPH);
1492  for (auto *ClonedBB : ClonedLoopBlocks)
1493  if (!BlocksInClonedLoop.count(ClonedBB))
1494  UnloopedBlockSet.insert(ClonedBB);
1495 
1496  // Copy the cloned exits and sort them in ascending loop depth, we'll work
1497  // backwards across these to process them inside out. The order shouldn't
1498  // matter as we're just trying to build up the map from inside-out; we use
1499  // the map in a more stably ordered way below.
1500  auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1501  llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1502  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1503  ExitLoopMap.lookup(RHS)->getLoopDepth();
1504  });
1505 
1506  // Populate the existing ExitLoopMap with everything reachable from each
1507  // exit, starting from the inner most exit.
1508  while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1509  assert(Worklist.empty() && "Didn't clear worklist!");
1510 
1511  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1512  Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1513 
1514  // Walk the CFG back until we hit the cloned PH adding everything reachable
1515  // and in the unlooped set to this exit block's loop.
1516  Worklist.push_back(ExitBB);
1517  do {
1518  BasicBlock *BB = Worklist.pop_back_val();
1519  // We can stop recursing at the cloned preheader (if we get there).
1520  if (BB == ClonedPH)
1521  continue;
1522 
1523  for (BasicBlock *PredBB : predecessors(BB)) {
1524  // If this pred has already been moved to our set or is part of some
1525  // (inner) loop, no update needed.
1526  if (!UnloopedBlockSet.erase(PredBB)) {
1527  assert(
1528  (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1529  "Predecessor not mapped to a loop!");
1530  continue;
1531  }
1532 
1533  // We just insert into the loop set here. We'll add these blocks to the
1534  // exit loop after we build up the set in an order that doesn't rely on
1535  // predecessor order (which in turn relies on use list order).
1536  bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1537  (void)Inserted;
1538  assert(Inserted && "Should only visit an unlooped block once!");
1539 
1540  // And recurse through to its predecessors.
1541  Worklist.push_back(PredBB);
1542  }
1543  } while (!Worklist.empty());
1544  }
1545 
1546  // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1547  // blocks to their outer loops, walk the cloned blocks and the cloned exits
1548  // in their original order adding them to the correct loop.
1549 
1550  // We need a stable insertion order. We use the order of the original loop
1551  // order and map into the correct parent loop.
1552  for (auto *BB : llvm::concat<BasicBlock *const>(
1553  makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1554  if (Loop *OuterL = ExitLoopMap.lookup(BB))
1555  OuterL->addBasicBlockToLoop(BB, LI);
1556 
1557 #ifndef NDEBUG
1558  for (auto &BBAndL : ExitLoopMap) {
1559  auto *BB = BBAndL.first;
1560  auto *OuterL = BBAndL.second;
1561  assert(LI.getLoopFor(BB) == OuterL &&
1562  "Failed to put all blocks into outer loops!");
1563  }
1564 #endif
1565 
1566  // Now that all the blocks are placed into the correct containing loop in the
1567  // absence of child loops, find all the potentially cloned child loops and
1568  // clone them into whatever outer loop we placed their header into.
1569  for (Loop *ChildL : OrigL) {
1570  auto *ClonedChildHeader =
1571  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1572  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1573  continue;
1574 
1575 #ifndef NDEBUG
1576  for (auto *ChildLoopBB : ChildL->blocks())
1577  assert(VMap.count(ChildLoopBB) &&
1578  "Cloned a child loop header but not all of that loops blocks!");
1579 #endif
1580 
1581  NonChildClonedLoops.push_back(cloneLoopNest(
1582  *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1583  }
1584 }
1585 
1586 static void
1588  ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1589  DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1590  // Find all the dead clones, and remove them from their successors.
1591  SmallVector<BasicBlock *, 16> DeadBlocks;
1592  for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1593  for (auto &VMap : VMaps)
1594  if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1595  if (!DT.isReachableFromEntry(ClonedBB)) {
1596  for (BasicBlock *SuccBB : successors(ClonedBB))
1597  SuccBB->removePredecessor(ClonedBB);
1598  DeadBlocks.push_back(ClonedBB);
1599  }
1600 
1601  // Remove all MemorySSA in the dead blocks
1602  if (MSSAU) {
1603  SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1604  DeadBlocks.end());
1605  MSSAU->removeBlocks(DeadBlockSet);
1606  }
1607 
1608  // Drop any remaining references to break cycles.
1609  for (BasicBlock *BB : DeadBlocks)
1610  BB->dropAllReferences();
1611  // Erase them from the IR.
1612  for (BasicBlock *BB : DeadBlocks)
1613  BB->eraseFromParent();
1614 }
1615 
1616 static void
1618  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1619  DominatorTree &DT, LoopInfo &LI,
1620  MemorySSAUpdater *MSSAU,
1621  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
1622  // Find all the dead blocks tied to this loop, and remove them from their
1623  // successors.
1624  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1625 
1626  // Start with loop/exit blocks and get a transitive closure of reachable dead
1627  // blocks.
1628  SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1629  ExitBlocks.end());
1630  DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1631  while (!DeathCandidates.empty()) {
1632  auto *BB = DeathCandidates.pop_back_val();
1633  if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1634  for (BasicBlock *SuccBB : successors(BB)) {
1635  SuccBB->removePredecessor(BB);
1636  DeathCandidates.push_back(SuccBB);
1637  }
1638  DeadBlockSet.insert(BB);
1639  }
1640  }
1641 
1642  // Remove all MemorySSA in the dead blocks
1643  if (MSSAU)
1644  MSSAU->removeBlocks(DeadBlockSet);
1645 
1646  // Filter out the dead blocks from the exit blocks list so that it can be
1647  // used in the caller.
1648  llvm::erase_if(ExitBlocks,
1649  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1650 
1651  // Walk from this loop up through its parents removing all of the dead blocks.
1652  for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1653  for (auto *BB : DeadBlockSet)
1654  ParentL->getBlocksSet().erase(BB);
1655  llvm::erase_if(ParentL->getBlocksVector(),
1656  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1657  }
1658 
1659  // Now delete the dead child loops. This raw delete will clear them
1660  // recursively.
1661  llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1662  if (!DeadBlockSet.count(ChildL->getHeader()))
1663  return false;
1664 
1665  assert(llvm::all_of(ChildL->blocks(),
1666  [&](BasicBlock *ChildBB) {
1667  return DeadBlockSet.count(ChildBB);
1668  }) &&
1669  "If the child loop header is dead all blocks in the child loop must "
1670  "be dead as well!");
1671  DestroyLoopCB(*ChildL, ChildL->getName());
1672  LI.destroy(ChildL);
1673  return true;
1674  });
1675 
1676  // Remove the loop mappings for the dead blocks and drop all the references
1677  // from these blocks to others to handle cyclic references as we start
1678  // deleting the blocks themselves.
1679  for (auto *BB : DeadBlockSet) {
1680  // Check that the dominator tree has already been updated.
1681  assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1682  LI.changeLoopFor(BB, nullptr);
1683  // Drop all uses of the instructions to make sure we won't have dangling
1684  // uses in other blocks.
1685  for (auto &I : *BB)
1686  if (!I.use_empty())
1687  I.replaceAllUsesWith(UndefValue::get(I.getType()));
1688  BB->dropAllReferences();
1689  }
1690 
1691  // Actually delete the blocks now that they've been fully unhooked from the
1692  // IR.
1693  for (auto *BB : DeadBlockSet)
1694  BB->eraseFromParent();
1695 }
1696 
1697 /// Recompute the set of blocks in a loop after unswitching.
1698 ///
1699 /// This walks from the original headers predecessors to rebuild the loop. We
1700 /// take advantage of the fact that new blocks can't have been added, and so we
1701 /// filter by the original loop's blocks. This also handles potentially
1702 /// unreachable code that we don't want to explore but might be found examining
1703 /// the predecessors of the header.
1704 ///
1705 /// If the original loop is no longer a loop, this will return an empty set. If
1706 /// it remains a loop, all the blocks within it will be added to the set
1707 /// (including those blocks in inner loops).
1709  LoopInfo &LI) {
1711 
1712  auto *PH = L.getLoopPreheader();
1713  auto *Header = L.getHeader();
1714 
1715  // A worklist to use while walking backwards from the header.
1717 
1718  // First walk the predecessors of the header to find the backedges. This will
1719  // form the basis of our walk.
1720  for (auto *Pred : predecessors(Header)) {
1721  // Skip the preheader.
1722  if (Pred == PH)
1723  continue;
1724 
1725  // Because the loop was in simplified form, the only non-loop predecessor
1726  // is the preheader.
1727  assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1728  "than the preheader that is not part of the "
1729  "loop!");
1730 
1731  // Insert this block into the loop set and on the first visit and, if it
1732  // isn't the header we're currently walking, put it into the worklist to
1733  // recurse through.
1734  if (LoopBlockSet.insert(Pred).second && Pred != Header)
1735  Worklist.push_back(Pred);
1736  }
1737 
1738  // If no backedges were found, we're done.
1739  if (LoopBlockSet.empty())
1740  return LoopBlockSet;
1741 
1742  // We found backedges, recurse through them to identify the loop blocks.
1743  while (!Worklist.empty()) {
1744  BasicBlock *BB = Worklist.pop_back_val();
1745  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1746 
1747  // No need to walk past the header.
1748  if (BB == Header)
1749  continue;
1750 
1751  // Because we know the inner loop structure remains valid we can use the
1752  // loop structure to jump immediately across the entire nested loop.
1753  // Further, because it is in loop simplified form, we can directly jump
1754  // to its preheader afterward.
1755  if (Loop *InnerL = LI.getLoopFor(BB))
1756  if (InnerL != &L) {
1757  assert(L.contains(InnerL) &&
1758  "Should not reach a loop *outside* this loop!");
1759  // The preheader is the only possible predecessor of the loop so
1760  // insert it into the set and check whether it was already handled.
1761  auto *InnerPH = InnerL->getLoopPreheader();
1762  assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1763  "but not contain the inner loop "
1764  "preheader!");
1765  if (!LoopBlockSet.insert(InnerPH).second)
1766  // The only way to reach the preheader is through the loop body
1767  // itself so if it has been visited the loop is already handled.
1768  continue;
1769 
1770  // Insert all of the blocks (other than those already present) into
1771  // the loop set. We expect at least the block that led us to find the
1772  // inner loop to be in the block set, but we may also have other loop
1773  // blocks if they were already enqueued as predecessors of some other
1774  // outer loop block.
1775  for (auto *InnerBB : InnerL->blocks()) {
1776  if (InnerBB == BB) {
1777  assert(LoopBlockSet.count(InnerBB) &&
1778  "Block should already be in the set!");
1779  continue;
1780  }
1781 
1782  LoopBlockSet.insert(InnerBB);
1783  }
1784 
1785  // Add the preheader to the worklist so we will continue past the
1786  // loop body.
1787  Worklist.push_back(InnerPH);
1788  continue;
1789  }
1790 
1791  // Insert any predecessors that were in the original loop into the new
1792  // set, and if the insert is successful, add them to the worklist.
1793  for (auto *Pred : predecessors(BB))
1794  if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1795  Worklist.push_back(Pred);
1796  }
1797 
1798  assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1799 
1800  // We've found all the blocks participating in the loop, return our completed
1801  // set.
1802  return LoopBlockSet;
1803 }
1804 
1805 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1806 ///
1807 /// The removal may have removed some child loops entirely but cannot have
1808 /// disturbed any remaining child loops. However, they may need to be hoisted
1809 /// to the parent loop (or to be top-level loops). The original loop may be
1810 /// completely removed.
1811 ///
1812 /// The sibling loops resulting from this update are returned. If the original
1813 /// loop remains a valid loop, it will be the first entry in this list with all
1814 /// of the newly sibling loops following it.
1815 ///
1816 /// Returns true if the loop remains a loop after unswitching, and false if it
1817 /// is no longer a loop after unswitching (and should not continue to be
1818 /// referenced).
1820  LoopInfo &LI,
1821  SmallVectorImpl<Loop *> &HoistedLoops) {
1822  auto *PH = L.getLoopPreheader();
1823 
1824  // Compute the actual parent loop from the exit blocks. Because we may have
1825  // pruned some exits the loop may be different from the original parent.
1826  Loop *ParentL = nullptr;
1827  SmallVector<Loop *, 4> ExitLoops;
1828  SmallVector<BasicBlock *, 4> ExitsInLoops;
1829  ExitsInLoops.reserve(ExitBlocks.size());
1830  for (auto *ExitBB : ExitBlocks)
1831  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1832  ExitLoops.push_back(ExitL);
1833  ExitsInLoops.push_back(ExitBB);
1834  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1835  ParentL = ExitL;
1836  }
1837 
1838  // Recompute the blocks participating in this loop. This may be empty if it
1839  // is no longer a loop.
1840  auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1841 
1842  // If we still have a loop, we need to re-set the loop's parent as the exit
1843  // block set changing may have moved it within the loop nest. Note that this
1844  // can only happen when this loop has a parent as it can only hoist the loop
1845  // *up* the nest.
1846  if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1847  // Remove this loop's (original) blocks from all of the intervening loops.
1848  for (Loop *IL = L.getParentLoop(); IL != ParentL;
1849  IL = IL->getParentLoop()) {
1850  IL->getBlocksSet().erase(PH);
1851  for (auto *BB : L.blocks())
1852  IL->getBlocksSet().erase(BB);
1853  llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1854  return BB == PH || L.contains(BB);
1855  });
1856  }
1857 
1858  LI.changeLoopFor(PH, ParentL);
1859  L.getParentLoop()->removeChildLoop(&L);
1860  if (ParentL)
1861  ParentL->addChildLoop(&L);
1862  else
1863  LI.addTopLevelLoop(&L);
1864  }
1865 
1866  // Now we update all the blocks which are no longer within the loop.
1867  auto &Blocks = L.getBlocksVector();
1868  auto BlocksSplitI =
1869  LoopBlockSet.empty()
1870  ? Blocks.begin()
1871  : std::stable_partition(
1872  Blocks.begin(), Blocks.end(),
1873  [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1874 
1875  // Before we erase the list of unlooped blocks, build a set of them.
1876  SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1877  if (LoopBlockSet.empty())
1878  UnloopedBlocks.insert(PH);
1879 
1880  // Now erase these blocks from the loop.
1881  for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1882  L.getBlocksSet().erase(BB);
1883  Blocks.erase(BlocksSplitI, Blocks.end());
1884 
1885  // Sort the exits in ascending loop depth, we'll work backwards across these
1886  // to process them inside out.
1887  llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1888  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1889  });
1890 
1891  // We'll build up a set for each exit loop.
1892  SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1893  Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1894 
1895  auto RemoveUnloopedBlocksFromLoop =
1896  [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1897  for (auto *BB : UnloopedBlocks)
1898  L.getBlocksSet().erase(BB);
1900  return UnloopedBlocks.count(BB);
1901  });
1902  };
1903 
1905  while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1906  assert(Worklist.empty() && "Didn't clear worklist!");
1907  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1908 
1909  // Grab the next exit block, in decreasing loop depth order.
1910  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1911  Loop &ExitL = *LI.getLoopFor(ExitBB);
1912  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1913 
1914  // Erase all of the unlooped blocks from the loops between the previous
1915  // exit loop and this exit loop. This works because the ExitInLoops list is
1916  // sorted in increasing order of loop depth and thus we visit loops in
1917  // decreasing order of loop depth.
1918  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1919  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1920 
1921  // Walk the CFG back until we hit the cloned PH adding everything reachable
1922  // and in the unlooped set to this exit block's loop.
1923  Worklist.push_back(ExitBB);
1924  do {
1925  BasicBlock *BB = Worklist.pop_back_val();
1926  // We can stop recursing at the cloned preheader (if we get there).
1927  if (BB == PH)
1928  continue;
1929 
1930  for (BasicBlock *PredBB : predecessors(BB)) {
1931  // If this pred has already been moved to our set or is part of some
1932  // (inner) loop, no update needed.
1933  if (!UnloopedBlocks.erase(PredBB)) {
1934  assert((NewExitLoopBlocks.count(PredBB) ||
1935  ExitL.contains(LI.getLoopFor(PredBB))) &&
1936  "Predecessor not in a nested loop (or already visited)!");
1937  continue;
1938  }
1939 
1940  // We just insert into the loop set here. We'll add these blocks to the
1941  // exit loop after we build up the set in a deterministic order rather
1942  // than the predecessor-influenced visit order.
1943  bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1944  (void)Inserted;
1945  assert(Inserted && "Should only visit an unlooped block once!");
1946 
1947  // And recurse through to its predecessors.
1948  Worklist.push_back(PredBB);
1949  }
1950  } while (!Worklist.empty());
1951 
1952  // If blocks in this exit loop were directly part of the original loop (as
1953  // opposed to a child loop) update the map to point to this exit loop. This
1954  // just updates a map and so the fact that the order is unstable is fine.
1955  for (auto *BB : NewExitLoopBlocks)
1956  if (Loop *BBL = LI.getLoopFor(BB))
1957  if (BBL == &L || !L.contains(BBL))
1958  LI.changeLoopFor(BB, &ExitL);
1959 
1960  // We will remove the remaining unlooped blocks from this loop in the next
1961  // iteration or below.
1962  NewExitLoopBlocks.clear();
1963  }
1964 
1965  // Any remaining unlooped blocks are no longer part of any loop unless they
1966  // are part of some child loop.
1967  for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1968  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1969  for (auto *BB : UnloopedBlocks)
1970  if (Loop *BBL = LI.getLoopFor(BB))
1971  if (BBL == &L || !L.contains(BBL))
1972  LI.changeLoopFor(BB, nullptr);
1973 
1974  // Sink all the child loops whose headers are no longer in the loop set to
1975  // the parent (or to be top level loops). We reach into the loop and directly
1976  // update its subloop vector to make this batch update efficient.
1977  auto &SubLoops = L.getSubLoopsVector();
1978  auto SubLoopsSplitI =
1979  LoopBlockSet.empty()
1980  ? SubLoops.begin()
1981  : std::stable_partition(
1982  SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1983  return LoopBlockSet.count(SubL->getHeader());
1984  });
1985  for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1986  HoistedLoops.push_back(HoistedL);
1987  HoistedL->setParentLoop(nullptr);
1988 
1989  // To compute the new parent of this hoisted loop we look at where we
1990  // placed the preheader above. We can't lookup the header itself because we
1991  // retained the mapping from the header to the hoisted loop. But the
1992  // preheader and header should have the exact same new parent computed
1993  // based on the set of exit blocks from the original loop as the preheader
1994  // is a predecessor of the header and so reached in the reverse walk. And
1995  // because the loops were all in simplified form the preheader of the
1996  // hoisted loop can't be part of some *other* loop.
1997  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1998  NewParentL->addChildLoop(HoistedL);
1999  else
2000  LI.addTopLevelLoop(HoistedL);
2001  }
2002  SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2003 
2004  // Actually delete the loop if nothing remained within it.
2005  if (Blocks.empty()) {
2006  assert(SubLoops.empty() &&
2007  "Failed to remove all subloops from the original loop!");
2008  if (Loop *ParentL = L.getParentLoop())
2009  ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2010  else
2011  LI.removeLoop(llvm::find(LI, &L));
2012  // markLoopAsDeleted for L should be triggered by the caller (it is typically
2013  // done by using the UnswitchCB callback).
2014  LI.destroy(&L);
2015  return false;
2016  }
2017 
2018  return true;
2019 }
2020 
2021 /// Helper to visit a dominator subtree, invoking a callable on each node.
2022 ///
2023 /// Returning false at any point will stop walking past that node of the tree.
2024 template <typename CallableT>
2025 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2026  SmallVector<DomTreeNode *, 4> DomWorklist;
2027  DomWorklist.push_back(DT[BB]);
2028 #ifndef NDEBUG
2030  Visited.insert(DT[BB]);
2031 #endif
2032  do {
2033  DomTreeNode *N = DomWorklist.pop_back_val();
2034 
2035  // Visit this node.
2036  if (!Callable(N->getBlock()))
2037  continue;
2038 
2039  // Accumulate the child nodes.
2040  for (DomTreeNode *ChildN : *N) {
2041  assert(Visited.insert(ChildN).second &&
2042  "Cannot visit a node twice when walking a tree!");
2043  DomWorklist.push_back(ChildN);
2044  }
2045  } while (!DomWorklist.empty());
2046 }
2047 
2049  Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2050  SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
2051  DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2052  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2053  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2054  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2055  auto *ParentBB = TI.getParent();
2056  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2057  SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2058 
2059  // We can only unswitch switches, conditional branches with an invariant
2060  // condition, or combining invariant conditions with an instruction or
2061  // partially invariant instructions.
2062  assert((SI || (BI && BI->isConditional())) &&
2063  "Can only unswitch switches and conditional branch!");
2064  bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2065  bool FullUnswitch =
2066  SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2067  !PartiallyInvariant);
2068  if (FullUnswitch)
2069  assert(Invariants.size() == 1 &&
2070  "Cannot have other invariants with full unswitching!");
2071  else
2072  assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
2073  "Partial unswitching requires an instruction as the condition!");
2074 
2075  if (MSSAU && VerifyMemorySSA)
2076  MSSAU->getMemorySSA()->verifyMemorySSA();
2077 
2078  // Constant and BBs tracking the cloned and continuing successor. When we are
2079  // unswitching the entire condition, this can just be trivially chosen to
2080  // unswitch towards `true`. However, when we are unswitching a set of
2081  // invariants combined with `and` or `or` or partially invariant instructions,
2082  // the combining operation determines the best direction to unswitch: we want
2083  // to unswitch the direction that will collapse the branch.
2084  bool Direction = true;
2085  int ClonedSucc = 0;
2086  if (!FullUnswitch) {
2088  (void)Cond;
2090  PartiallyInvariant) &&
2091  "Only `or`, `and`, an `select`, partially invariant instructions "
2092  "can combine invariants being unswitched.");
2093  if (!match(Cond, m_LogicalOr())) {
2094  if (match(Cond, m_LogicalAnd()) ||
2095  (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2096  Direction = false;
2097  ClonedSucc = 1;
2098  }
2099  }
2100  }
2101 
2102  BasicBlock *RetainedSuccBB =
2103  BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2104  SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2105  if (BI)
2106  UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2107  else
2108  for (auto Case : SI->cases())
2109  if (Case.getCaseSuccessor() != RetainedSuccBB)
2110  UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2111 
2112  assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2113  "Should not unswitch the same successor we are retaining!");
2114 
2115  // The branch should be in this exact loop. Any inner loop's invariant branch
2116  // should be handled by unswitching that inner loop. The caller of this
2117  // routine should filter out any candidates that remain (but were skipped for
2118  // whatever reason).
2119  assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2120 
2121  // Compute the parent loop now before we start hacking on things.
2122  Loop *ParentL = L.getParentLoop();
2123  // Get blocks in RPO order for MSSA update, before changing the CFG.
2124  LoopBlocksRPO LBRPO(&L);
2125  if (MSSAU)
2126  LBRPO.perform(&LI);
2127 
2128  // Compute the outer-most loop containing one of our exit blocks. This is the
2129  // furthest up our loopnest which can be mutated, which we will use below to
2130  // update things.
2131  Loop *OuterExitL = &L;
2132  for (auto *ExitBB : ExitBlocks) {
2133  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2134  if (!NewOuterExitL) {
2135  // We exited the entire nest with this block, so we're done.
2136  OuterExitL = nullptr;
2137  break;
2138  }
2139  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2140  OuterExitL = NewOuterExitL;
2141  }
2142 
2143  // At this point, we're definitely going to unswitch something so invalidate
2144  // any cached information in ScalarEvolution for the outer most loop
2145  // containing an exit block and all nested loops.
2146  if (SE) {
2147  if (OuterExitL)
2148  SE->forgetLoop(OuterExitL);
2149  else
2150  SE->forgetTopmostLoop(&L);
2151  }
2152 
2153  bool InsertFreeze = false;
2154  if (FreezeLoopUnswitchCond) {
2155  ICFLoopSafetyInfo SafetyInfo;
2156  SafetyInfo.computeLoopSafetyInfo(&L);
2157  InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
2158  }
2159 
2160  // If the edge from this terminator to a successor dominates that successor,
2161  // store a map from each block in its dominator subtree to it. This lets us
2162  // tell when cloning for a particular successor if a block is dominated by
2163  // some *other* successor with a single data structure. We use this to
2164  // significantly reduce cloning.
2166  for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2167  makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2168  if (SuccBB->getUniquePredecessor() ||
2169  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2170  return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2171  }))
2172  visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2173  DominatingSucc[BB] = SuccBB;
2174  return true;
2175  });
2176 
2177  // Split the preheader, so that we know that there is a safe place to insert
2178  // the conditional branch. We will change the preheader to have a conditional
2179  // branch on LoopCond. The original preheader will become the split point
2180  // between the unswitched versions, and we will have a new preheader for the
2181  // original loop.
2182  BasicBlock *SplitBB = L.getLoopPreheader();
2183  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2184 
2185  // Keep track of the dominator tree updates needed.
2187 
2188  // Clone the loop for each unswitched successor.
2190  VMaps.reserve(UnswitchedSuccBBs.size());
2192  for (auto *SuccBB : UnswitchedSuccBBs) {
2193  VMaps.emplace_back(new ValueToValueMapTy());
2194  ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2195  L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2196  DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2197  }
2198 
2199  // Drop metadata if we may break its semantics by moving this instr into the
2200  // split block.
2201  if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2203  // Do not spend time trying to understand if we can keep it, just drop it
2204  // to save compile time.
2205  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2206  else {
2207  // It is only legal to preserve make.implicit metadata if we are
2208  // guaranteed no reach implicit null check after following this branch.
2209  ICFLoopSafetyInfo SafetyInfo;
2210  SafetyInfo.computeLoopSafetyInfo(&L);
2211  if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2212  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2213  }
2214  }
2215 
2216  // The stitching of the branched code back together depends on whether we're
2217  // doing full unswitching or not with the exception that we always want to
2218  // nuke the initial terminator placed in the split block.
2219  SplitBB->getTerminator()->eraseFromParent();
2220  if (FullUnswitch) {
2221  // Splice the terminator from the original loop and rewrite its
2222  // successors.
2223  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2224 
2225  // Keep a clone of the terminator for MSSA updates.
2226  Instruction *NewTI = TI.clone();
2227  ParentBB->getInstList().push_back(NewTI);
2228 
2229  // First wire up the moved terminator to the preheaders.
2230  if (BI) {
2231  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2232  BI->setSuccessor(ClonedSucc, ClonedPH);
2233  BI->setSuccessor(1 - ClonedSucc, LoopPH);
2235  if (InsertFreeze) {
2236  if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
2237  Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI);
2238  }
2239  BI->setCondition(Cond);
2240  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2241  } else {
2242  assert(SI && "Must either be a branch or switch!");
2243 
2244  // Walk the cases and directly update their successors.
2245  assert(SI->getDefaultDest() == RetainedSuccBB &&
2246  "Not retaining default successor!");
2247  SI->setDefaultDest(LoopPH);
2248  for (auto &Case : SI->cases())
2249  if (Case.getCaseSuccessor() == RetainedSuccBB)
2250  Case.setSuccessor(LoopPH);
2251  else
2252  Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2253 
2254  if (InsertFreeze) {
2255  auto Cond = SI->getCondition();
2256  if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT))
2257  SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI));
2258  }
2259  // We need to use the set to populate domtree updates as even when there
2260  // are multiple cases pointing at the same successor we only want to
2261  // remove and insert one edge in the domtree.
2262  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2263  DTUpdates.push_back(
2264  {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2265  }
2266 
2267  if (MSSAU) {
2268  DT.applyUpdates(DTUpdates);
2269  DTUpdates.clear();
2270 
2271  // Remove all but one edge to the retained block and all unswitched
2272  // blocks. This is to avoid having duplicate entries in the cloned Phis,
2273  // when we know we only keep a single edge for each case.
2274  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2275  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2276  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2277 
2278  for (auto &VMap : VMaps)
2279  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2280  /*IgnoreIncomingWithNoClones=*/true);
2281  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2282 
2283  // Remove all edges to unswitched blocks.
2284  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2285  MSSAU->removeEdge(ParentBB, SuccBB);
2286  }
2287 
2288  // Now unhook the successor relationship as we'll be replacing
2289  // the terminator with a direct branch. This is much simpler for branches
2290  // than switches so we handle those first.
2291  if (BI) {
2292  // Remove the parent as a predecessor of the unswitched successor.
2293  assert(UnswitchedSuccBBs.size() == 1 &&
2294  "Only one possible unswitched block for a branch!");
2295  BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2296  UnswitchedSuccBB->removePredecessor(ParentBB,
2297  /*KeepOneInputPHIs*/ true);
2298  DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2299  } else {
2300  // Note that we actually want to remove the parent block as a predecessor
2301  // of *every* case successor. The case successor is either unswitched,
2302  // completely eliminating an edge from the parent to that successor, or it
2303  // is a duplicate edge to the retained successor as the retained successor
2304  // is always the default successor and as we'll replace this with a direct
2305  // branch we no longer need the duplicate entries in the PHI nodes.
2306  SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2307  assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2308  "Not retaining default successor!");
2309  for (auto &Case : NewSI->cases())
2310  Case.getCaseSuccessor()->removePredecessor(
2311  ParentBB,
2312  /*KeepOneInputPHIs*/ true);
2313 
2314  // We need to use the set to populate domtree updates as even when there
2315  // are multiple cases pointing at the same successor we only want to
2316  // remove and insert one edge in the domtree.
2317  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2318  DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2319  }
2320 
2321  // After MSSAU update, remove the cloned terminator instruction NewTI.
2322  ParentBB->getTerminator()->eraseFromParent();
2323 
2324  // Create a new unconditional branch to the continuing block (as opposed to
2325  // the one cloned).
2326  BranchInst::Create(RetainedSuccBB, ParentBB);
2327  } else {
2328  assert(BI && "Only branches have partial unswitching.");
2329  assert(UnswitchedSuccBBs.size() == 1 &&
2330  "Only one possible unswitched block for a branch!");
2331  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2332  // When doing a partial unswitch, we have to do a bit more work to build up
2333  // the branch in the split block.
2334  if (PartiallyInvariant)
2336  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2337  else {
2339  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2340  FreezeLoopUnswitchCond, BI, &AC, DT);
2341  }
2342  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2343 
2344  if (MSSAU) {
2345  DT.applyUpdates(DTUpdates);
2346  DTUpdates.clear();
2347 
2348  // Perform MSSA cloning updates.
2349  for (auto &VMap : VMaps)
2350  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2351  /*IgnoreIncomingWithNoClones=*/true);
2352  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2353  }
2354  }
2355 
2356  // Apply the updates accumulated above to get an up-to-date dominator tree.
2357  DT.applyUpdates(DTUpdates);
2358 
2359  // Now that we have an accurate dominator tree, first delete the dead cloned
2360  // blocks so that we can accurately build any cloned loops. It is important to
2361  // not delete the blocks from the original loop yet because we still want to
2362  // reference the original loop to understand the cloned loop's structure.
2363  deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2364 
2365  // Build the cloned loop structure itself. This may be substantially
2366  // different from the original structure due to the simplified CFG. This also
2367  // handles inserting all the cloned blocks into the correct loops.
2368  SmallVector<Loop *, 4> NonChildClonedLoops;
2369  for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2370  buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2371 
2372  // Now that our cloned loops have been built, we can update the original loop.
2373  // First we delete the dead blocks from it and then we rebuild the loop
2374  // structure taking these deletions into account.
2375  deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);
2376 
2377  if (MSSAU && VerifyMemorySSA)
2378  MSSAU->getMemorySSA()->verifyMemorySSA();
2379 
2380  SmallVector<Loop *, 4> HoistedLoops;
2381  bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
2382 
2383  if (MSSAU && VerifyMemorySSA)
2384  MSSAU->getMemorySSA()->verifyMemorySSA();
2385 
2386  // This transformation has a high risk of corrupting the dominator tree, and
2387  // the below steps to rebuild loop structures will result in hard to debug
2388  // errors in that case so verify that the dominator tree is sane first.
2389  // FIXME: Remove this when the bugs stop showing up and rely on existing
2390  // verification steps.
2392 
2393  if (BI && !PartiallyInvariant) {
2394  // If we unswitched a branch which collapses the condition to a known
2395  // constant we want to replace all the uses of the invariants within both
2396  // the original and cloned blocks. We do this here so that we can use the
2397  // now updated dominator tree to identify which side the users are on.
2398  assert(UnswitchedSuccBBs.size() == 1 &&
2399  "Only one possible unswitched block for a branch!");
2400  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2401 
2402  // When considering multiple partially-unswitched invariants
2403  // we cant just go replace them with constants in both branches.
2404  //
2405  // For 'AND' we infer that true branch ("continue") means true
2406  // for each invariant operand.
2407  // For 'OR' we can infer that false branch ("continue") means false
2408  // for each invariant operand.
2409  // So it happens that for multiple-partial case we dont replace
2410  // in the unswitched branch.
2411  bool ReplaceUnswitched =
2412  FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2413 
2414  ConstantInt *UnswitchedReplacement =
2417  ConstantInt *ContinueReplacement =
2420  for (Value *Invariant : Invariants) {
2421  assert(!isa<Constant>(Invariant) &&
2422  "Should not be replacing constant values!");
2423  // Use make_early_inc_range here as set invalidates the iterator.
2424  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2425  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2426  if (!UserI)
2427  continue;
2428 
2429  // Replace it with the 'continue' side if in the main loop body, and the
2430  // unswitched if in the cloned blocks.
2431  if (DT.dominates(LoopPH, UserI->getParent()))
2432  U.set(ContinueReplacement);
2433  else if (ReplaceUnswitched &&
2434  DT.dominates(ClonedPH, UserI->getParent()))
2435  U.set(UnswitchedReplacement);
2436  }
2437  }
2438  }
2439 
2440  // We can change which blocks are exit blocks of all the cloned sibling
2441  // loops, the current loop, and any parent loops which shared exit blocks
2442  // with the current loop. As a consequence, we need to re-form LCSSA for
2443  // them. But we shouldn't need to re-form LCSSA for any child loops.
2444  // FIXME: This could be made more efficient by tracking which exit blocks are
2445  // new, and focusing on them, but that isn't likely to be necessary.
2446  //
2447  // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2448  // loop nest and update every loop that could have had its exits changed. We
2449  // also need to cover any intervening loops. We add all of these loops to
2450  // a list and sort them by loop depth to achieve this without updating
2451  // unnecessary loops.
2452  auto UpdateLoop = [&](Loop &UpdateL) {
2453 #ifndef NDEBUG
2454  UpdateL.verifyLoop();
2455  for (Loop *ChildL : UpdateL) {
2456  ChildL->verifyLoop();
2457  assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2458  "Perturbed a child loop's LCSSA form!");
2459  }
2460 #endif
2461  // First build LCSSA for this loop so that we can preserve it when
2462  // forming dedicated exits. We don't want to perturb some other loop's
2463  // LCSSA while doing that CFG edit.
2464  formLCSSA(UpdateL, DT, &LI, SE);
2465 
2466  // For loops reached by this loop's original exit blocks we may
2467  // introduced new, non-dedicated exits. At least try to re-form dedicated
2468  // exits for these loops. This may fail if they couldn't have dedicated
2469  // exits to start with.
2470  formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2471  };
2472 
2473  // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2474  // and we can do it in any order as they don't nest relative to each other.
2475  //
2476  // Also check if any of the loops we have updated have become top-level loops
2477  // as that will necessitate widening the outer loop scope.
2478  for (Loop *UpdatedL :
2479  llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2480  UpdateLoop(*UpdatedL);
2481  if (UpdatedL->isOutermost())
2482  OuterExitL = nullptr;
2483  }
2484  if (IsStillLoop) {
2485  UpdateLoop(L);
2486  if (L.isOutermost())
2487  OuterExitL = nullptr;
2488  }
2489 
2490  // If the original loop had exit blocks, walk up through the outer most loop
2491  // of those exit blocks to update LCSSA and form updated dedicated exits.
2492  if (OuterExitL != &L)
2493  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2494  OuterL = OuterL->getParentLoop())
2495  UpdateLoop(*OuterL);
2496 
2497 #ifndef NDEBUG
2498  // Verify the entire loop structure to catch any incorrect updates before we
2499  // progress in the pass pipeline.
2500  LI.verify(DT);
2501 #endif
2502 
2503  // Now that we've unswitched something, make callbacks to report the changes.
2504  // For that we need to merge together the updated loops and the cloned loops
2505  // and check whether the original loop survived.
2506  SmallVector<Loop *, 4> SibLoops;
2507  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2508  if (UpdatedL->getParentLoop() == ParentL)
2509  SibLoops.push_back(UpdatedL);
2510  UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2511 
2512  if (MSSAU && VerifyMemorySSA)
2513  MSSAU->getMemorySSA()->verifyMemorySSA();
2514 
2515  if (BI)
2516  ++NumBranches;
2517  else
2518  ++NumSwitches;
2519 }
2520 
2521 /// Recursively compute the cost of a dominator subtree based on the per-block
2522 /// cost map provided.
2523 ///
2524 /// The recursive computation is memozied into the provided DT-indexed cost map
2525 /// to allow querying it for most nodes in the domtree without it becoming
2526 /// quadratic.
2528  DomTreeNode &N,
2531  // Don't accumulate cost (or recurse through) blocks not in our block cost
2532  // map and thus not part of the duplication cost being considered.
2533  auto BBCostIt = BBCostMap.find(N.getBlock());
2534  if (BBCostIt == BBCostMap.end())
2535  return 0;
2536 
2537  // Lookup this node to see if we already computed its cost.
2538  auto DTCostIt = DTCostMap.find(&N);
2539  if (DTCostIt != DTCostMap.end())
2540  return DTCostIt->second;
2541 
2542  // If not, we have to compute it. We can't use insert above and update
2543  // because computing the cost may insert more things into the map.
2544  InstructionCost Cost = std::accumulate(
2545  N.begin(), N.end(), BBCostIt->second,
2546  [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2547  return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2548  });
2549  bool Inserted = DTCostMap.insert({&N, Cost}).second;
2550  (void)Inserted;
2551  assert(Inserted && "Should not insert a node while visiting children!");
2552  return Cost;
2553 }
2554 
2555 /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2556 /// making the following replacement:
2557 ///
2558 /// --code before guard--
2559 /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2560 /// --code after guard--
2561 ///
2562 /// into
2563 ///
2564 /// --code before guard--
2565 /// br i1 %cond, label %guarded, label %deopt
2566 ///
2567 /// guarded:
2568 /// --code after guard--
2569 ///
2570 /// deopt:
2571 /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2572 /// unreachable
2573 ///
2574 /// It also makes all relevant DT and LI updates, so that all structures are in
2575 /// valid state after this transform.
2576 static BranchInst *
2578  SmallVectorImpl<BasicBlock *> &ExitBlocks,
2579  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2581  LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2582  BasicBlock *CheckBB = GI->getParent();
2583 
2584  if (MSSAU && VerifyMemorySSA)
2585  MSSAU->getMemorySSA()->verifyMemorySSA();
2586 
2587  // Remove all CheckBB's successors from DomTree. A block can be seen among
2588  // successors more than once, but for DomTree it should be added only once.
2589  SmallPtrSet<BasicBlock *, 4> Successors;
2590  for (auto *Succ : successors(CheckBB))
2591  if (Successors.insert(Succ).second)
2592  DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2593 
2594  Instruction *DeoptBlockTerm =
2595  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2596  BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2597  // SplitBlockAndInsertIfThen inserts control flow that branches to
2598  // DeoptBlockTerm if the condition is true. We want the opposite.
2599  CheckBI->swapSuccessors();
2600 
2601  BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2602  GuardedBlock->setName("guarded");
2603  CheckBI->getSuccessor(1)->setName("deopt");
2604  BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2605 
2606  // We now have a new exit block.
2607  ExitBlocks.push_back(CheckBI->getSuccessor(1));
2608 
2609  if (MSSAU)
2610  MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2611 
2612  GI->moveBefore(DeoptBlockTerm);
2614 
2615  // Add new successors of CheckBB into DomTree.
2616  for (auto *Succ : successors(CheckBB))
2617  DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2618 
2619  // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2620  // successors.
2621  for (auto *Succ : Successors)
2622  DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2623 
2624  // Make proper changes to DT.
2625  DT.applyUpdates(DTUpdates);
2626  // Inform LI of a new loop block.
2627  L.addBasicBlockToLoop(GuardedBlock, LI);
2628 
2629  if (MSSAU) {
2630  MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2631  MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2632  if (VerifyMemorySSA)
2633  MSSAU->getMemorySSA()->verifyMemorySSA();
2634  }
2635 
2636  ++NumGuards;
2637  return CheckBI;
2638 }
2639 
2640 /// Cost multiplier is a way to limit potentially exponential behavior
2641 /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2642 /// candidates available. Also accounting for the number of "sibling" loops with
2643 /// the idea to account for previous unswitches that already happened on this
2644 /// cluster of loops. There was an attempt to keep this formula simple,
2645 /// just enough to limit the worst case behavior. Even if it is not that simple
2646 /// now it is still not an attempt to provide a detailed heuristic size
2647 /// prediction.
2648 ///
2649 /// TODO: Make a proper accounting of "explosion" effect for all kinds of
2650 /// unswitch candidates, making adequate predictions instead of wild guesses.
2651 /// That requires knowing not just the number of "remaining" candidates but
2652 /// also costs of unswitching for each of these candidates.
2654  Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2656  UnswitchCandidates) {
2657 
2658  // Guards and other exiting conditions do not contribute to exponential
2659  // explosion as soon as they dominate the latch (otherwise there might be
2660  // another path to the latch remaining that does not allow to eliminate the
2661  // loop copy on unswitch).
2662  BasicBlock *Latch = L.getLoopLatch();
2663  BasicBlock *CondBlock = TI.getParent();
2664  if (DT.dominates(CondBlock, Latch) &&
2665  (isGuard(&TI) ||
2666  llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2667  return L.contains(SuccBB);
2668  }) <= 1)) {
2669  NumCostMultiplierSkipped++;
2670  return 1;
2671  }
2672 
2673  auto *ParentL = L.getParentLoop();
2674  int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2675  : std::distance(LI.begin(), LI.end()));
2676  // Count amount of clones that all the candidates might cause during
2677  // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2678  int UnswitchedClones = 0;
2679  for (auto Candidate : UnswitchCandidates) {
2680  Instruction *CI = Candidate.first;
2681  BasicBlock *CondBlock = CI->getParent();
2682  bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2683  if (isGuard(CI)) {
2684  if (!SkipExitingSuccessors)
2685  UnswitchedClones++;
2686  continue;
2687  }
2688  int NonExitingSuccessors = llvm::count_if(
2689  successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2690  return !SkipExitingSuccessors || L.contains(SuccBB);
2691  });
2692  UnswitchedClones += Log2_32(NonExitingSuccessors);
2693  }
2694 
2695  // Ignore up to the "unscaled candidates" number of unswitch candidates
2696  // when calculating the power-of-two scaling of the cost. The main idea
2697  // with this control is to allow a small number of unswitches to happen
2698  // and rely more on siblings multiplier (see below) when the number
2699  // of candidates is small.
2700  unsigned ClonesPower =
2701  std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2702 
2703  // Allowing top-level loops to spread a bit more than nested ones.
2704  int SiblingsMultiplier =
2705  std::max((ParentL ? SiblingsCount
2706  : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2707  1);
2708  // Compute the cost multiplier in a way that won't overflow by saturating
2709  // at an upper bound.
2710  int CostMultiplier;
2711  if (ClonesPower > Log2_32(UnswitchThreshold) ||
2712  SiblingsMultiplier > UnswitchThreshold)
2713  CostMultiplier = UnswitchThreshold;
2714  else
2715  CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2716  (int)UnswitchThreshold);
2717 
2718  LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2719  << " (siblings " << SiblingsMultiplier << " * clones "
2720  << (1 << ClonesPower) << ")"
2721  << " for unswitch candidate: " << TI << "\n");
2722  return CostMultiplier;
2723 }
2724 
2726  Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2728  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2729  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2730  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2731  // Collect all invariant conditions within this loop (as opposed to an inner
2732  // loop which would be handled when visiting that inner loop).
2734  UnswitchCandidates;
2735 
2736  // Whether or not we should also collect guards in the loop.
2737  bool CollectGuards = false;
2738  if (UnswitchGuards) {
2739  auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2740  Intrinsic::getName(Intrinsic::experimental_guard));
2741  if (GuardDecl && !GuardDecl->use_empty())
2742  CollectGuards = true;
2743  }
2744 
2745  IVConditionInfo PartialIVInfo;
2746  for (auto *BB : L.blocks()) {
2747  if (LI.getLoopFor(BB) != &L)
2748  continue;
2749 
2750  if (CollectGuards)
2751  for (auto &I : *BB)
2752  if (isGuard(&I)) {
2753  auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2754  // TODO: Support AND, OR conditions and partial unswitching.
2755  if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2756  UnswitchCandidates.push_back({&I, {Cond}});
2757  }
2758 
2759  if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2760  // We can only consider fully loop-invariant switch conditions as we need
2761  // to completely eliminate the switch after unswitching.
2762  if (!isa<Constant>(SI->getCondition()) &&
2763  L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2764  UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2765  continue;
2766  }
2767 
2768  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2769  if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2770  BI->getSuccessor(0) == BI->getSuccessor(1))
2771  continue;
2772 
2773  Value *Cond = skipTrivialSelect(BI->getCondition());
2774  if (isa<Constant>(Cond))
2775  continue;
2776 
2777  if (L.isLoopInvariant(Cond)) {
2778  UnswitchCandidates.push_back({BI, {Cond}});
2779  continue;
2780  }
2781 
2782  Instruction &CondI = *cast<Instruction>(Cond);
2783  if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2784  TinyPtrVector<Value *> Invariants =
2786  if (Invariants.empty())
2787  continue;
2788 
2789  UnswitchCandidates.push_back({BI, std::move(Invariants)});
2790  continue;
2791  }
2792  }
2793 
2794  Instruction *PartialIVCondBranch = nullptr;
2795  if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2796  !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2797  return TerminatorAndInvariants.first == L.getHeader()->getTerminator();
2798  })) {
2799  MemorySSA *MSSA = MSSAU->getMemorySSA();
2800  if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2801  LLVM_DEBUG(
2802  dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2803  << *Info->InstToDuplicate[0] << "\n");
2804  PartialIVInfo = *Info;
2805  PartialIVCondBranch = L.getHeader()->getTerminator();
2806  TinyPtrVector<Value *> ValsToDuplicate;
2807  llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2808  UnswitchCandidates.push_back(
2809  {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2810  }
2811  }
2812 
2813  // If we didn't find any candidates, we're done.
2814  if (UnswitchCandidates.empty())
2815  return false;
2816 
2817  // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2818  // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2819  // irreducible control flow into reducible control flow and introduce new
2820  // loops "out of thin air". If we ever discover important use cases for doing
2821  // this, we can add support to loop unswitch, but it is a lot of complexity
2822  // for what seems little or no real world benefit.
2823  LoopBlocksRPO RPOT(&L);
2824  RPOT.perform(&LI);
2825  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2826  return false;
2827 
2828  SmallVector<BasicBlock *, 4> ExitBlocks;
2829  L.getUniqueExitBlocks(ExitBlocks);
2830 
2831  // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
2832  // instruction as we don't know how to split those exit blocks.
2833  // FIXME: We should teach SplitBlock to handle this and remove this
2834  // restriction.
2835  for (auto *ExitBB : ExitBlocks) {
2836  auto *I = ExitBB->getFirstNonPHI();
2837  if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
2838  LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
2839  "in exit block\n");
2840  return false;
2841  }
2842  }
2843 
2844  LLVM_DEBUG(
2845  dbgs() << "Considering " << UnswitchCandidates.size()
2846  << " non-trivial loop invariant conditions for unswitching.\n");
2847 
2848  // Given that unswitching these terminators will require duplicating parts of
2849  // the loop, so we need to be able to model that cost. Compute the ephemeral
2850  // values and set up a data structure to hold per-BB costs. We cache each
2851  // block's cost so that we don't recompute this when considering different
2852  // subsets of the loop for duplication during unswitching.
2854  CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2856 
2857  // Compute the cost of each block, as well as the total loop cost. Also, bail
2858  // out if we see instructions which are incompatible with loop unswitching
2859  // (convergent, noduplicate, or cross-basic-block tokens).
2860  // FIXME: We might be able to safely handle some of these in non-duplicated
2861  // regions.
2863  L.getHeader()->getParent()->hasMinSize()
2866  InstructionCost LoopCost = 0;
2867  for (auto *BB : L.blocks()) {
2868  InstructionCost Cost = 0;
2869  for (auto &I : *BB) {
2870  if (EphValues.count(&I))
2871  continue;
2872 
2873  if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2874  return false;
2875  if (auto *CB = dyn_cast<CallBase>(&I))
2876  if (CB->isConvergent() || CB->cannotDuplicate())
2877  return false;
2878 
2879  Cost += TTI.getUserCost(&I, CostKind);
2880  }
2881  assert(Cost >= 0 && "Must not have negative costs!");
2882  LoopCost += Cost;
2883  assert(LoopCost >= 0 && "Must not have negative loop costs!");
2884  BBCostMap[BB] = Cost;
2885  }
2886  LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
2887 
2888  // Now we find the best candidate by searching for the one with the following
2889  // properties in order:
2890  //
2891  // 1) An unswitching cost below the threshold
2892  // 2) The smallest number of duplicated unswitch candidates (to avoid
2893  // creating redundant subsequent unswitching)
2894  // 3) The smallest cost after unswitching.
2895  //
2896  // We prioritize reducing fanout of unswitch candidates provided the cost
2897  // remains below the threshold because this has a multiplicative effect.
2898  //
2899  // This requires memoizing each dominator subtree to avoid redundant work.
2900  //
2901  // FIXME: Need to actually do the number of candidates part above.
2903  // Given a terminator which might be unswitched, computes the non-duplicated
2904  // cost for that terminator.
2905  auto ComputeUnswitchedCost = [&](Instruction &TI,
2906  bool FullUnswitch) -> InstructionCost {
2907  BasicBlock &BB = *TI.getParent();
2909 
2910  InstructionCost Cost = 0;
2911  for (BasicBlock *SuccBB : successors(&BB)) {
2912  // Don't count successors more than once.
2913  if (!Visited.insert(SuccBB).second)
2914  continue;
2915 
2916  // If this is a partial unswitch candidate, then it must be a conditional
2917  // branch with a condition of either `or`, `and`, their corresponding
2918  // select forms or partially invariant instructions. In that case, one of
2919  // the successors is necessarily duplicated, so don't even try to remove
2920  // its cost.
2921  if (!FullUnswitch) {
2922  auto &BI = cast<BranchInst>(TI);
2923  Value *Cond = skipTrivialSelect(BI.getCondition());
2924  if (match(Cond, m_LogicalAnd())) {
2925  if (SuccBB == BI.getSuccessor(1))
2926  continue;
2927  } else if (match(Cond, m_LogicalOr())) {
2928  if (SuccBB == BI.getSuccessor(0))
2929  continue;
2930  } else if ((PartialIVInfo.KnownValue->isOneValue() &&
2931  SuccBB == BI.getSuccessor(0)) ||
2932  (!PartialIVInfo.KnownValue->isOneValue() &&
2933  SuccBB == BI.getSuccessor(1)))
2934  continue;
2935  }
2936 
2937  // This successor's domtree will not need to be duplicated after
2938  // unswitching if the edge to the successor dominates it (and thus the
2939  // entire tree). This essentially means there is no other path into this
2940  // subtree and so it will end up live in only one clone of the loop.
2941  if (SuccBB->getUniquePredecessor() ||
2942  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2943  return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2944  })) {
2945  Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2946  assert(Cost <= LoopCost &&
2947  "Non-duplicated cost should never exceed total loop cost!");
2948  }
2949  }
2950 
2951  // Now scale the cost by the number of unique successors minus one. We
2952  // subtract one because there is already at least one copy of the entire
2953  // loop. This is computing the new cost of unswitching a condition.
2954  // Note that guards always have 2 unique successors that are implicit and
2955  // will be materialized if we decide to unswitch it.
2956  int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2957  assert(SuccessorsCount > 1 &&
2958  "Cannot unswitch a condition without multiple distinct successors!");
2959  return (LoopCost - Cost) * (SuccessorsCount - 1);
2960  };
2961  Instruction *BestUnswitchTI = nullptr;
2962  InstructionCost BestUnswitchCost = 0;
2963  ArrayRef<Value *> BestUnswitchInvariants;
2964  for (auto &TerminatorAndInvariants : UnswitchCandidates) {
2965  Instruction &TI = *TerminatorAndInvariants.first;
2966  ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
2967  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2968  InstructionCost CandidateCost = ComputeUnswitchedCost(
2969  TI, /*FullUnswitch*/ !BI ||
2970  (Invariants.size() == 1 &&
2971  Invariants[0] == skipTrivialSelect(BI->getCondition())));
2972  // Calculate cost multiplier which is a tool to limit potentially
2973  // exponential behavior of loop-unswitch.
2975  int CostMultiplier =
2976  CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2977  assert(
2978  (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
2979  "cost multiplier needs to be in the range of 1..UnswitchThreshold");
2980  CandidateCost *= CostMultiplier;
2981  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2982  << " (multiplier: " << CostMultiplier << ")"
2983  << " for unswitch candidate: " << TI << "\n");
2984  } else {
2985  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2986  << " for unswitch candidate: " << TI << "\n");
2987  }
2988 
2989  if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2990  BestUnswitchTI = &TI;
2991  BestUnswitchCost = CandidateCost;
2992  BestUnswitchInvariants = Invariants;
2993  }
2994  }
2995  assert(BestUnswitchTI && "Failed to find loop unswitch candidate");
2996 
2997  if (BestUnswitchCost >= UnswitchThreshold) {
2998  LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
2999  << BestUnswitchCost << "\n");
3000  return false;
3001  }
3002 
3003  if (BestUnswitchTI != PartialIVCondBranch)
3004  PartialIVInfo.InstToDuplicate.clear();
3005 
3006  // If the best candidate is a guard, turn it into a branch.
3007  if (isGuard(BestUnswitchTI))
3008  BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
3009  ExitBlocks, DT, LI, MSSAU);
3010 
3011  LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
3012  << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
3013  << "\n");
3014  unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
3015  ExitBlocks, PartialIVInfo, DT, LI, AC,
3016  UnswitchCB, SE, MSSAU, DestroyLoopCB);
3017  return true;
3018 }
3019 
3020 /// Unswitch control flow predicated on loop invariant conditions.
3021 ///
3022 /// This first hoists all branches or switches which are trivial (IE, do not
3023 /// require duplicating any part of the loop) out of the loop body. It then
3024 /// looks at other loop invariant control flows and tries to unswitch those as
3025 /// well by cloning the loop if the result is small enough.
3026 ///
3027 /// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3028 /// also updated based on the unswitch. The `MSSA` analysis is also updated if
3029 /// valid (i.e. its use is enabled).
3030 ///
3031 /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3032 /// true, we will attempt to do non-trivial unswitching as well as trivial
3033 /// unswitching.
3034 ///
3035 /// The `UnswitchCB` callback provided will be run after unswitching is
3036 /// complete, with the first parameter set to `true` if the provided loop
3037 /// remains a loop, and a list of new sibling loops created.
3038 ///
3039 /// If `SE` is non-null, we will update that analysis based on the unswitching
3040 /// done.
3041 static bool
3043  AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
3044  bool NonTrivial,
3045  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3046  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
3047  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3048  assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3049  "Loops must be in LCSSA form before unswitching.");
3050 
3051  // Must be in loop simplified form: we need a preheader and dedicated exits.
3052  if (!L.isLoopSimplifyForm())
3053  return false;
3054 
3055  // Try trivial unswitch first before loop over other basic blocks in the loop.
3056  if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3057  // If we unswitched successfully we will want to clean up the loop before
3058  // processing it further so just mark it as unswitched and return.
3059  UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3060  return true;
3061  }
3062 
3063  // Check whether we should continue with non-trivial conditions.
3064  // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3065  // unswitching for testing and debugging.
3066  // NonTrivial: Parameter that enables non-trivial unswitching for this
3067  // invocation of the transform. But this should be allowed only
3068  // for targets without branch divergence.
3069  //
3070  // FIXME: If divergence analysis becomes available to a loop
3071  // transform, we should allow unswitching for non-trivial uniform
3072  // branches even on targets that have divergence.
3073  // https://bugs.llvm.org/show_bug.cgi?id=48819
3074  bool ContinueWithNonTrivial =
3075  EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
3076  if (!ContinueWithNonTrivial)
3077  return false;
3078 
3079  // Skip non-trivial unswitching for optsize functions.
3080  if (L.getHeader()->getParent()->hasOptSize())
3081  return false;
3082 
3083  // Skip non-trivial unswitching for loops that cannot be cloned.
3084  if (!L.isSafeToClone())
3085  return false;
3086 
3087  // For non-trivial unswitching, because it often creates new loops, we rely on
3088  // the pass manager to iterate on the loops rather than trying to immediately
3089  // reach a fixed point. There is no substantial advantage to iterating
3090  // internally, and if any of the new loops are simplified enough to contain
3091  // trivial unswitching we want to prefer those.
3092 
3093  // Try to unswitch the best invariant condition. We prefer this full unswitch to
3094  // a partial unswitch when possible below the threshold.
3095  if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
3096  DestroyLoopCB))
3097  return true;
3098 
3099  // No other opportunities to unswitch.
3100  return false;
3101 }
3102 
3105  LPMUpdater &U) {
3106  Function &F = *L.getHeader()->getParent();
3107  (void)F;
3108 
3109  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3110  << "\n");
3111 
3112  // Save the current loop name in a variable so that we can report it even
3113  // after it has been deleted.
3114  std::string LoopName = std::string(L.getName());
3115 
3116  auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3117  bool PartiallyInvariant,
3118  ArrayRef<Loop *> NewLoops) {
3119  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3120  if (!NewLoops.empty())
3121  U.addSiblingLoops(NewLoops);
3122 
3123  // If the current loop remains valid, we should revisit it to catch any
3124  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3125  if (CurrentLoopValid) {
3126  if (PartiallyInvariant) {
3127  // Mark the new loop as partially unswitched, to avoid unswitching on
3128  // the same condition again.
3129  auto &Context = L.getHeader()->getContext();
3130  MDNode *DisableUnswitchMD = MDNode::get(
3131  Context,
3132  MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3134  Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3135  {DisableUnswitchMD});
3136  L.setLoopID(NewLoopID);
3137  } else
3138  U.revisitCurrentLoop();
3139  } else
3140  U.markLoopAsDeleted(L, LoopName);
3141  };
3142 
3143  auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
3144  U.markLoopAsDeleted(L, Name);
3145  };
3146 
3148  if (AR.MSSA) {
3149  MSSAU = MemorySSAUpdater(AR.MSSA);
3150  if (VerifyMemorySSA)
3151  AR.MSSA->verifyMemorySSA();
3152  }
3153  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3154  UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr,
3155  DestroyLoopCB))
3156  return PreservedAnalyses::all();
3157 
3158  if (AR.MSSA && VerifyMemorySSA)
3159  AR.MSSA->verifyMemorySSA();
3160 
3161  // Historically this pass has had issues with the dominator tree so verify it
3162  // in asserts builds.
3164 
3165  auto PA = getLoopPassPreservedAnalyses();
3166  if (AR.MSSA)
3167  PA.preserve<MemorySSAAnalysis>();
3168  return PA;
3169 }
3170 
3172  raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3174  OS, MapClassName2PassName);
3175 
3176  OS << "<";
3177  OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3178  OS << (Trivial ? "" : "no-") << "trivial";
3179  OS << ">";
3180 }
3181 
3182 namespace {
3183 
3184 class SimpleLoopUnswitchLegacyPass : public LoopPass {
3185  bool NonTrivial;
3186 
3187 public:
3188  static char ID; // Pass ID, replacement for typeid
3189 
3190  explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3191  : LoopPass(ID), NonTrivial(NonTrivial) {
3194  }
3195 
3196  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3197 
3198  void getAnalysisUsage(AnalysisUsage &AU) const override {
3204  }
3205 };
3206 
3207 } // end anonymous namespace
3208 
3209 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3210  if (skipLoop(L))
3211  return false;
3212 
3213  Function &F = *L->getHeader()->getParent();
3214 
3215  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
3216  << "\n");
3217 
3218  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3219  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3220  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3221  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3222  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3223  MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3224  MemorySSAUpdater MSSAU(MSSA);
3225 
3226  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3227  auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3228 
3229  auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3230  ArrayRef<Loop *> NewLoops) {
3231  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3232  for (auto *NewL : NewLoops)
3233  LPM.addLoop(*NewL);
3234 
3235  // If the current loop remains valid, re-add it to the queue. This is
3236  // a little wasteful as we'll finish processing the current loop as well,
3237  // but it is the best we can do in the old PM.
3238  if (CurrentLoopValid) {
3239  // If the current loop has been unswitched using a partially invariant
3240  // condition, we should not re-add the current loop to avoid unswitching
3241  // on the same condition again.
3242  if (!PartiallyInvariant)
3243  LPM.addLoop(*L);
3244  } else
3245  LPM.markLoopAsDeleted(*L);
3246  };
3247 
3248  auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
3249  LPM.markLoopAsDeleted(L);
3250  };
3251 
3252  if (VerifyMemorySSA)
3253  MSSA->verifyMemorySSA();
3254 
3255  bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
3256  UnswitchCB, SE, &MSSAU, DestroyLoopCB);
3257 
3258  if (VerifyMemorySSA)
3259  MSSA->verifyMemorySSA();
3260 
3261  // Historically this pass has had issues with the dominator tree so verify it
3262  // in asserts builds.
3264 
3265  return Changed;
3266 }
3267 
3269 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3270  "Simple unswitch loops", false, false)
3277 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3279 
3281  return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3282 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::formLCSSA
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:335
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:517
cloneLoopNest
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Definition: SimpleLoopUnswitch.cpp:1278
AssumptionCache.h
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:299
llvm::createSimpleLoopUnswitchLegacyPass
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
Definition: SimpleLoopUnswitch.cpp:3280
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
UnswitchNumInitialUnscaledCandidates
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1906
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:53
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:280
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:61
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:224
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
ValueMapper.h
llvm::SwitchInstProfUpdateWrapper
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
Definition: Instructions.h:3572
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::PassInfoMixin< SimpleLoopUnswitchPass >
llvm::SwitchInstProfUpdateWrapper::getSuccessorWeight
CaseWeightOpt getSuccessorWeight(unsigned idx)
Definition: Instructions.cpp:4497
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
llvm::cfg::UpdateKind::Insert
@ Insert
unswitchTrivialBranch
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:449
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:138
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5404
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:687
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::SwitchInst::Create
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3419
recomputeLoopBlockSet
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
Definition: SimpleLoopUnswitch.cpp:1708
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:212
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:879
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1807
llvm::SmallDenseMap
Definition: DenseMap.h:882
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1024
ValueTracking.h
Local.h
llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1378
llvm::MemorySSAUpdater::removeBlocks
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
Definition: MemorySSAUpdater.cpp:1372
unswitchNontrivialInvariants
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, SmallVectorImpl< BasicBlock * > &ExitBlocks, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:2048
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:144
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin
iterator begin()
Definition: DenseMap.h:75
CalculateUnswitchCostMultiplier
static int CalculateUnswitchCostMultiplier(Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT, ArrayRef< std::pair< Instruction *, TinyPtrVector< Value * >>> UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
Definition: SimpleLoopUnswitch.cpp:2653
ScalarEvolution.h
unswitchBestCondition
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:2725
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1287
areLoopExitPHIsLoopInvariant
static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
Definition: SimpleLoopUnswitch.cpp:195
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::DominatorTreeBase::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::MemorySSAUpdater::moveAllAfterSpliceBlocks
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
Definition: MemorySSAUpdater.cpp:1247
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
turnGuardIntoBranch
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
Definition: SimpleLoopUnswitch.cpp:2577
unswitch
simple loop unswitch
Definition: SimpleLoopUnswitch.cpp:3277
llvm::Optional< uint32_t >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::SwitchInstProfUpdateWrapper::removeCase
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Definition: Instructions.cpp:4455
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::SwitchInst::cases
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Definition: Instructions.h:3476
llvm::SwitchInst::getDefaultDest
BasicBlock * getDefaultDest() const
Definition: Instructions.h:3437
llvm::SimpleLoopUnswitchPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: SimpleLoopUnswitch.cpp:3171
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
Sequence.h
llvm::Constant::isOneValue
bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:110
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:175
llvm::zip_first
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:785
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1716
computeDomSubtreeCost
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
Definition: SimpleLoopUnswitch.cpp:2527
Use.h
MustExecute.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:201
llvm::MemorySSAUpdater::applyUpdates
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:809
llvm::MemorySSAUpdater::createMemoryAccessInBB
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
Definition: MemorySSAUpdater.cpp:1429
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1329
llvm::LPPassManager::addLoop
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1300
buildPartialInvariantUnswitchConditionalBranch
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
Definition: SimpleLoopUnswitch.cpp:234
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3184
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1366
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::Optional::getPointer
constexpr const T * getPointer() const
Definition: Optional.h:303
LoopAnalysisManager.h
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1125
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
DropNonTrivialImplicitNullChecks
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
Instruction.h
CommandLine.h
CodeMetrics.h
replaceLoopInvariantUses
static void replaceLoopInvariantUses(Loop &L, Value *Invariant, Constant &Replacement)
Definition: SimpleLoopUnswitch.cpp:178
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::SwitchInst::CaseHandleImpl::getCaseSuccessor
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
Definition: Instructions.h:3303
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:254
hoistLoopToNewParent
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
Definition: SimpleLoopUnswitch.cpp:350
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:998
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1456
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::DominatorTreeBase::insertEdge
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
Definition: GenericDomTree.h:584
FreezeLoopUnswitchCond
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
Twine.h
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:411
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:235
llvm::SwitchInst::CaseHandleImpl::getSuccessorIndex
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
Definition: Instructions.h:3314
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TinyPtrVector::push_back
void push_back(EltTy NewVal)
Definition: TinyPtrVector.h:244
rewritePHINodesForExitAndUnswitchedBlocks
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Definition: SimpleLoopUnswitch.cpp:305
llvm::LoopInfoBase::addTopLevelLoop
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1043
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:194
llvm::LoopInfoBase::destroy
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:1090
UnswitchThreshold
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:623
llvm::MemorySSAUpdater::moveToPlace
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Definition: MemorySSAUpdater.cpp:1200
llvm::Instruction
Definition: Instruction.h:42
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3172
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
unswitchLoop
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Unswitch control flow predicated on loop invariant conditions.
Definition: SimpleLoopUnswitch.cpp:3042
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:355
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:372
llvm::SwitchInstProfUpdateWrapper::setSuccessorWeight
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Definition: Instructions.cpp:4503
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1777
LoopUtils.h
llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:260
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::LPPassManager
Definition: LoopPass.h:76
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
PatternMatch.h
visitDomSubTree
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
Definition: SimpleLoopUnswitch.cpp:2025
llvm::ValueMap::count
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:152
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:885
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:71
LoopIterator.h
deleteDeadClonedBlocks
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy >> VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:1587
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1356
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:720
llvm::MemorySSAUpdater::removeDuplicatePhiEdgesBetween
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
Definition: MemorySSAUpdater.cpp:538
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:511
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:279
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::LoopInfoBase::AllocateLoop
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:950
collectHomogenousInstGraphLoopInvariants
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root, LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
Definition: SimpleLoopUnswitch.cpp:136
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3167
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::MemorySSAUpdater::applyInsertUpdates
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:860
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:137
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:475
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:41
BasicBlock.h
llvm::cl::opt< bool >
skipTrivialSelect
static Value * skipTrivialSelect(Value *Cond)
Definition: SimpleLoopUnswitch.cpp:121
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:531
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::MemorySSAUpdater::removeEdge
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
Definition: MemorySSAUpdater.cpp:531
uint64_t
llvm::SwitchInst::setDefaultDest
void setDefaultDest(BasicBlock *DefaultCase)
Definition: Instructions.h:3441
llvm::LoopPass
Definition: LoopPass.h:28
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2535
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2549
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:620
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:242
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1637
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:121
llvm::MemorySSAUpdater::updateForClonedLoop
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
Definition: MemorySSAUpdater.cpp:676
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3142
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:714
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:986
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:731
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:618
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:386
SimpleLoopUnswitch.h
UnswitchGuards
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:497
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::ValueToValueMapTy
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
Definition: MemorySSAUpdater.h:50
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::IVConditionInfo::KnownValue
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:518
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
llvm::LoopBase::getBlocksVector
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
Definition: LoopInfo.h:208
llvm::IVConditionInfo::InstToDuplicate
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:515
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:961
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:8087
llvm::MDNode
Metadata node.
Definition: Metadata.h:937
LoopPassManager.h
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:948
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
InstructionCost.h
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
getTopMostExitingLoop
static Loop * getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI)
Definition: SimpleLoopUnswitch.cpp:421
buildPartialUnswitchConditionalBranch
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, Instruction *I, AssumptionCache *AC, DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
Definition: SimpleLoopUnswitch.cpp:214
CFG.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:862
llvm::LoopInfoBase::getLoopDepth
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:993
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:95
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:420
rebuildLoopAfterUnswitch
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops)
Rebuild a loop after unswitching removes some subset of blocks and edges.
Definition: SimpleLoopUnswitch.cpp:1819
llvm::LoopBase::getSubLoopsVector
std::vector< LoopT * > & getSubLoopsVector()
Definition: LoopInfo.h:163
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1102
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::MemorySSA::getBlockDefs
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:779
llvm::initializeSimpleLoopUnswitchLegacyPassPass
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
llvm::SwitchInst::CaseHandleImpl::getCaseValue
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Definition: Instructions.h:3296
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:184
llvm::LoopBase::reserveBlocks
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:452
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:222
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1823
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
loops
simple loop Simple unswitch loops
Definition: SimpleLoopUnswitch.cpp:3278
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
unswitchTrivialSwitch
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:680
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
llvm::findOptionMDForLoop
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1042
llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:990
llvm::DomTreeNodeBase< BasicBlock >
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::LoopInfoBase::removeLoop
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:1013
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:883
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:664
llvm::MemorySSAUpdater::updateExitBlocksForClonedLoop
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Definition: MemorySSAUpdater.cpp:787
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:309
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:89
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::SimpleLoopUnswitchPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: SimpleLoopUnswitch.cpp:3103
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:181
Constant.h
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:516
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1761
deleteDeadBlocksFromLoop
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:1617
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:8031
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:876
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:524
llvm::SwitchInstProfUpdateWrapper::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
Definition: Instructions.cpp:4469
MSSAThreshold
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:110
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:58
llvm::IRBuilderBase::CreateFreeze
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2361
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::ICFLoopSafetyInfo
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2693
GenericDomTree.h
buildClonedLoops
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
Definition: SimpleLoopUnswitch.cpp:1337
llvm::LoopBase::getBlocksSet
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
Definition: LoopInfo.h:214
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:500
llvm::MemorySSA::BeforeTerminator
@ BeforeTerminator
Definition: MemorySSA.h:802
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1562
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::IVConditionInfo
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:512
llvm::Loop::isRecursivelyLCSSAForm
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:465
llvm::hasPartialIVCondition
Optional< IVConditionInfo > hasPartialIVCondition(Loop &L, unsigned MSSAThreshold, MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1650
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:52
llvm::SwitchInstProfUpdateWrapper::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
Definition: Instructions.cpp:4488
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
GuardUtils.h
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
AA
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:364
llvm::TinyPtrVector::empty
bool empty() const
Definition: TinyPtrVector.h:163
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:962
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
Instructions.h
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:136
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:241
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::FreezeInst
This class represents a freeze function that returns random concrete value if an operand is either a ...
Definition: Instructions.h:5394
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
N
#define N
rewritePHINodesForUnswitchedExitBlock
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
Definition: SimpleLoopUnswitch.cpp:283
llvm::SwitchInst::CaseHandle
Definition: Instructions.h:3330
llvm::BasicBlock::size
size_t size() const
Definition: BasicBlock.h:307
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::Loop::isSafeToClone
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
Definition: LoopInfo.cpp:482
llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:78
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2651
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:318
llvm::SmallVectorImpl< DominatorTree::UpdateType >
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:661
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:241
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ICFLoopSafetyInfo::isGuaranteedToExecute
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Definition: MustExecute.cpp:267
llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:165
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3230
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1446
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3086
raw_ostream.h
llvm::LoopBlocksRPO::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:644
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:837
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3165
llvm::PatternMatch::m_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
Definition: PatternMatch.h:2531
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3179
SetVector.h
llvm::DominatorTreeBase::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:96
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
unswitchAllTrivialConditions
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
Definition: SimpleLoopUnswitch.cpp:975
UnswitchSiblingsToplevelDiv
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
EnableNonTrivialUnswitch
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
EnableUnswitchCostMultiplier
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
buildClonedLoopBlocks
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Build the cloned blocks for an unswitched copy of the given loop.
Definition: SimpleLoopUnswitch.cpp:1094