LLVM  14.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1 ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/Sequence.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/LoopPass.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/PatternMatch.h"
42 #include "llvm/IR/Use.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/InitializePasses.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <iterator>
61 #include <numeric>
62 #include <utility>
63 
64 #define DEBUG_TYPE "simple-loop-unswitch"
65 
66 using namespace llvm;
67 using namespace llvm::PatternMatch;
68 
69 STATISTIC(NumBranches, "Number of branches unswitched");
70 STATISTIC(NumSwitches, "Number of switches unswitched");
71 STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
72 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
73 STATISTIC(
74  NumCostMultiplierSkipped,
75  "Number of unswitch candidates that had their cost multiplier skipped");
76 
78  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
79  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
80  "following the configuration passed into the pass."));
81 
82 static cl::opt<int>
83  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
84  cl::desc("The cost threshold for unswitching a loop."));
85 
87  "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
88  cl::desc("Enable unswitch cost multiplier that prohibits exponential "
89  "explosion in nontrivial unswitch."));
91  "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
92  cl::desc("Toplevel siblings divisor for cost multiplier."));
94  "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
95  cl::desc("Number of unswitch candidates that are ignored when calculating "
96  "cost multiplier."));
98  "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
99  cl::desc("If enabled, simple loop unswitching will also consider "
100  "llvm.experimental.guard intrinsics as unswitch candidates."));
102  "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
103  cl::init(false), cl::Hidden,
104  cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
105  "null checks to save time analyzing if we can keep it."));
106 static cl::opt<unsigned>
107  MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
108  cl::desc("Max number of memory uses to explore during "
109  "partial unswitching analysis"),
110  cl::init(100), cl::Hidden);
111 
112 /// Collect all of the loop invariant input values transitively used by the
113 /// homogeneous instruction graph from a given root.
114 ///
115 /// This essentially walks from a root recursively through loop variant operands
116 /// which have the exact same opcode and finds all inputs which are loop
117 /// invariant. For some operations these can be re-associated and unswitched out
118 /// of the loop entirely.
121  LoopInfo &LI) {
122  assert(!L.isLoopInvariant(&Root) &&
123  "Only need to walk the graph if root itself is not invariant.");
124  TinyPtrVector<Value *> Invariants;
125 
126  bool IsRootAnd = match(&Root, m_LogicalAnd());
127  bool IsRootOr = match(&Root, m_LogicalOr());
128 
129  // Build a worklist and recurse through operators collecting invariants.
132  Worklist.push_back(&Root);
133  Visited.insert(&Root);
134  do {
135  Instruction &I = *Worklist.pop_back_val();
136  for (Value *OpV : I.operand_values()) {
137  // Skip constants as unswitching isn't interesting for them.
138  if (isa<Constant>(OpV))
139  continue;
140 
141  // Add it to our result if loop invariant.
142  if (L.isLoopInvariant(OpV)) {
143  Invariants.push_back(OpV);
144  continue;
145  }
146 
147  // If not an instruction with the same opcode, nothing we can do.
148  Instruction *OpI = dyn_cast<Instruction>(OpV);
149 
150  if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
151  (IsRootOr && match(OpI, m_LogicalOr())))) {
152  // Visit this operand.
153  if (Visited.insert(OpI).second)
154  Worklist.push_back(OpI);
155  }
156  }
157  } while (!Worklist.empty());
158 
159  return Invariants;
160 }
161 
162 static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
163  Constant &Replacement) {
164  assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
165 
166  // Replace uses of LIC in the loop with the given constant.
167  // We use make_early_inc_range as set invalidates the iterator.
168  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
169  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
170 
171  // Replace this use within the loop body.
172  if (UserI && L.contains(UserI))
173  U.set(&Replacement);
174  }
175 }
176 
177 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
178 /// incoming values along this edge.
179 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
180  BasicBlock &ExitBB) {
181  for (Instruction &I : ExitBB) {
182  auto *PN = dyn_cast<PHINode>(&I);
183  if (!PN)
184  // No more PHIs to check.
185  return true;
186 
187  // If the incoming value for this edge isn't loop invariant the unswitch
188  // won't be trivial.
189  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
190  return false;
191  }
192  llvm_unreachable("Basic blocks should never be empty!");
193 }
194 
195 /// Copy a set of loop invariant values \p ToDuplicate and insert them at the
196 /// end of \p BB and conditionally branch on the copied condition. We only
197 /// branch on a single value.
199  ArrayRef<Value *> Invariants,
200  bool Direction,
201  BasicBlock &UnswitchedSucc,
202  BasicBlock &NormalSucc) {
203  IRBuilder<> IRB(&BB);
204 
205  Value *Cond = Direction ? IRB.CreateOr(Invariants) :
206  IRB.CreateAnd(Invariants);
207  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
208  Direction ? &NormalSucc : &UnswitchedSucc);
209 }
210 
211 /// Copy a set of loop invariant values, and conditionally branch on them.
213  BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
214  BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
215  MemorySSAUpdater *MSSAU) {
216  ValueToValueMapTy VMap;
217  for (auto *Val : reverse(ToDuplicate)) {
218  Instruction *Inst = cast<Instruction>(Val);
219  Instruction *NewInst = Inst->clone();
220  BB.getInstList().insert(BB.end(), NewInst);
221  RemapInstruction(NewInst, VMap,
223  VMap[Val] = NewInst;
224 
225  if (!MSSAU)
226  continue;
227 
228  MemorySSA *MSSA = MSSAU->getMemorySSA();
229  if (auto *MemUse =
230  dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
231  auto *DefiningAccess = MemUse->getDefiningAccess();
232  // Get the first defining access before the loop.
233  while (L.contains(DefiningAccess->getBlock())) {
234  // If the defining access is a MemoryPhi, get the incoming
235  // value for the pre-header as defining access.
236  if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
237  DefiningAccess =
238  MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
239  else
240  DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
241  }
242  MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
243  NewInst->getParent(),
245  }
246  }
247 
248  IRBuilder<> IRB(&BB);
249  Value *Cond = VMap[ToDuplicate[0]];
250  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
251  Direction ? &NormalSucc : &UnswitchedSucc);
252 }
253 
254 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
255 ///
256 /// Requires that the loop exit and unswitched basic block are the same, and
257 /// that the exiting block was a unique predecessor of that block. Rewrites the
258 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
259 /// PHI nodes from the old preheader that now contains the unswitched
260 /// terminator.
262  BasicBlock &OldExitingBB,
263  BasicBlock &OldPH) {
264  for (PHINode &PN : UnswitchedBB.phis()) {
265  // When the loop exit is directly unswitched we just need to update the
266  // incoming basic block. We loop to handle weird cases with repeated
267  // incoming blocks, but expect to typically only have one operand here.
268  for (auto i : seq<int>(0, PN.getNumOperands())) {
269  assert(PN.getIncomingBlock(i) == &OldExitingBB &&
270  "Found incoming block different from unique predecessor!");
271  PN.setIncomingBlock(i, &OldPH);
272  }
273  }
274 }
275 
276 /// Rewrite the PHI nodes in the loop exit basic block and the split off
277 /// unswitched block.
278 ///
279 /// Because the exit block remains an exit from the loop, this rewrites the
280 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
281 /// nodes into the unswitched basic block to select between the value in the
282 /// old preheader and the loop exit.
284  BasicBlock &UnswitchedBB,
285  BasicBlock &OldExitingBB,
286  BasicBlock &OldPH,
287  bool FullUnswitch) {
288  assert(&ExitBB != &UnswitchedBB &&
289  "Must have different loop exit and unswitched blocks!");
290  Instruction *InsertPt = &*UnswitchedBB.begin();
291  for (PHINode &PN : ExitBB.phis()) {
292  auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
293  PN.getName() + ".split", InsertPt);
294 
295  // Walk backwards over the old PHI node's inputs to minimize the cost of
296  // removing each one. We have to do this weird loop manually so that we
297  // create the same number of new incoming edges in the new PHI as we expect
298  // each case-based edge to be included in the unswitched switch in some
299  // cases.
300  // FIXME: This is really, really gross. It would be much cleaner if LLVM
301  // allowed us to create a single entry for a predecessor block without
302  // having separate entries for each "edge" even though these edges are
303  // required to produce identical results.
304  for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
305  if (PN.getIncomingBlock(i) != &OldExitingBB)
306  continue;
307 
308  Value *Incoming = PN.getIncomingValue(i);
309  if (FullUnswitch)
310  // No more edge from the old exiting block to the exit block.
311  PN.removeIncomingValue(i);
312 
313  NewPN->addIncoming(Incoming, &OldPH);
314  }
315 
316  // Now replace the old PHI with the new one and wire the old one in as an
317  // input to the new one.
318  PN.replaceAllUsesWith(NewPN);
319  NewPN->addIncoming(&PN, &ExitBB);
320  }
321 }
322 
323 /// Hoist the current loop up to the innermost loop containing a remaining exit.
324 ///
325 /// Because we've removed an exit from the loop, we may have changed the set of
326 /// loops reachable and need to move the current loop up the loop nest or even
327 /// to an entirely separate nest.
328 static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
329  DominatorTree &DT, LoopInfo &LI,
330  MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
331  // If the loop is already at the top level, we can't hoist it anywhere.
332  Loop *OldParentL = L.getParentLoop();
333  if (!OldParentL)
334  return;
335 
337  L.getExitBlocks(Exits);
338  Loop *NewParentL = nullptr;
339  for (auto *ExitBB : Exits)
340  if (Loop *ExitL = LI.getLoopFor(ExitBB))
341  if (!NewParentL || NewParentL->contains(ExitL))
342  NewParentL = ExitL;
343 
344  if (NewParentL == OldParentL)
345  return;
346 
347  // The new parent loop (if different) should always contain the old one.
348  if (NewParentL)
349  assert(NewParentL->contains(OldParentL) &&
350  "Can only hoist this loop up the nest!");
351 
352  // The preheader will need to move with the body of this loop. However,
353  // because it isn't in this loop we also need to update the primary loop map.
354  assert(OldParentL == LI.getLoopFor(&Preheader) &&
355  "Parent loop of this loop should contain this loop's preheader!");
356  LI.changeLoopFor(&Preheader, NewParentL);
357 
358  // Remove this loop from its old parent.
359  OldParentL->removeChildLoop(&L);
360 
361  // Add the loop either to the new parent or as a top-level loop.
362  if (NewParentL)
363  NewParentL->addChildLoop(&L);
364  else
365  LI.addTopLevelLoop(&L);
366 
367  // Remove this loops blocks from the old parent and every other loop up the
368  // nest until reaching the new parent. Also update all of these
369  // no-longer-containing loops to reflect the nesting change.
370  for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
371  OldContainingL = OldContainingL->getParentLoop()) {
372  llvm::erase_if(OldContainingL->getBlocksVector(),
373  [&](const BasicBlock *BB) {
374  return BB == &Preheader || L.contains(BB);
375  });
376 
377  OldContainingL->getBlocksSet().erase(&Preheader);
378  for (BasicBlock *BB : L.blocks())
379  OldContainingL->getBlocksSet().erase(BB);
380 
381  // Because we just hoisted a loop out of this one, we have essentially
382  // created new exit paths from it. That means we need to form LCSSA PHI
383  // nodes for values used in the no-longer-nested loop.
384  formLCSSA(*OldContainingL, DT, &LI, SE);
385 
386  // We shouldn't need to form dedicated exits because the exit introduced
387  // here is the (just split by unswitching) preheader. However, after trivial
388  // unswitching it is possible to get new non-dedicated exits out of parent
389  // loop so let's conservatively form dedicated exit blocks and figure out
390  // if we can optimize later.
391  formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
392  /*PreserveLCSSA*/ true);
393  }
394 }
395 
396 // Return the top-most loop containing ExitBB and having ExitBB as exiting block
397 // or the loop containing ExitBB, if there is no parent loop containing ExitBB
398 // as exiting block.
400  Loop *TopMost = LI.getLoopFor(ExitBB);
401  Loop *Current = TopMost;
402  while (Current) {
403  if (Current->isLoopExiting(ExitBB))
404  TopMost = Current;
405  Current = Current->getParentLoop();
406  }
407  return TopMost;
408 }
409 
410 /// Unswitch a trivial branch if the condition is loop invariant.
411 ///
412 /// This routine should only be called when loop code leading to the branch has
413 /// been validated as trivial (no side effects). This routine checks if the
414 /// condition is invariant and one of the successors is a loop exit. This
415 /// allows us to unswitch without duplicating the loop, making it trivial.
416 ///
417 /// If this routine fails to unswitch the branch it returns false.
418 ///
419 /// If the branch can be unswitched, this routine splits the preheader and
420 /// hoists the branch above that split. Preserves loop simplified form
421 /// (splitting the exit block as necessary). It simplifies the branch within
422 /// the loop to an unconditional branch but doesn't remove it entirely. Further
423 /// cleanup can be done with some simplifycfg like pass.
424 ///
425 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
426 /// invalidated by this.
428  LoopInfo &LI, ScalarEvolution *SE,
429  MemorySSAUpdater *MSSAU) {
430  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
431  LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
432 
433  // The loop invariant values that we want to unswitch.
434  TinyPtrVector<Value *> Invariants;
435 
436  // When true, we're fully unswitching the branch rather than just unswitching
437  // some input conditions to the branch.
438  bool FullUnswitch = false;
439 
440  if (L.isLoopInvariant(BI.getCondition())) {
441  Invariants.push_back(BI.getCondition());
442  FullUnswitch = true;
443  } else {
444  if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
445  Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
446  if (Invariants.empty()) {
447  LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
448  return false;
449  }
450  }
451 
452  // Check that one of the branch's successors exits, and which one.
453  bool ExitDirection = true;
454  int LoopExitSuccIdx = 0;
455  auto *LoopExitBB = BI.getSuccessor(0);
456  if (L.contains(LoopExitBB)) {
457  ExitDirection = false;
458  LoopExitSuccIdx = 1;
459  LoopExitBB = BI.getSuccessor(1);
460  if (L.contains(LoopExitBB)) {
461  LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
462  return false;
463  }
464  }
465  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
466  auto *ParentBB = BI.getParent();
467  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
468  LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
469  return false;
470  }
471 
472  // When unswitching only part of the branch's condition, we need the exit
473  // block to be reached directly from the partially unswitched input. This can
474  // be done when the exit block is along the true edge and the branch condition
475  // is a graph of `or` operations, or the exit block is along the false edge
476  // and the condition is a graph of `and` operations.
477  if (!FullUnswitch) {
478  if (ExitDirection ? !match(BI.getCondition(), m_LogicalOr())
479  : !match(BI.getCondition(), m_LogicalAnd())) {
480  LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
481  "non-full unswitch!\n");
482  return false;
483  }
484  }
485 
486  LLVM_DEBUG({
487  dbgs() << " unswitching trivial invariant conditions for: " << BI
488  << "\n";
489  for (Value *Invariant : Invariants) {
490  dbgs() << " " << *Invariant << " == true";
491  if (Invariant != Invariants.back())
492  dbgs() << " ||";
493  dbgs() << "\n";
494  }
495  });
496 
497  // If we have scalar evolutions, we need to invalidate them including this
498  // loop, the loop containing the exit block and the topmost parent loop
499  // exiting via LoopExitBB.
500  if (SE) {
501  if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
502  SE->forgetLoop(ExitL);
503  else
504  // Forget the entire nest as this exits the entire nest.
505  SE->forgetTopmostLoop(&L);
506  }
507 
508  if (MSSAU && VerifyMemorySSA)
509  MSSAU->getMemorySSA()->verifyMemorySSA();
510 
511  // Split the preheader, so that we know that there is a safe place to insert
512  // the conditional branch. We will change the preheader to have a conditional
513  // branch on LoopCond.
514  BasicBlock *OldPH = L.getLoopPreheader();
515  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
516 
517  // Now that we have a place to insert the conditional branch, create a place
518  // to branch to: this is the exit block out of the loop that we are
519  // unswitching. We need to split this if there are other loop predecessors.
520  // Because the loop is in simplified form, *any* other predecessor is enough.
521  BasicBlock *UnswitchedBB;
522  if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
523  assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
524  "A branch's parent isn't a predecessor!");
525  UnswitchedBB = LoopExitBB;
526  } else {
527  UnswitchedBB =
528  SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
529  }
530 
531  if (MSSAU && VerifyMemorySSA)
532  MSSAU->getMemorySSA()->verifyMemorySSA();
533 
534  // Actually move the invariant uses into the unswitched position. If possible,
535  // we do this by moving the instructions, but when doing partial unswitching
536  // we do it by building a new merge of the values in the unswitched position.
537  OldPH->getTerminator()->eraseFromParent();
538  if (FullUnswitch) {
539  // If fully unswitching, we can use the existing branch instruction.
540  // Splice it into the old PH to gate reaching the new preheader and re-point
541  // its successors.
542  OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
543  BI);
544  if (MSSAU) {
545  // Temporarily clone the terminator, to make MSSA update cheaper by
546  // separating "insert edge" updates from "remove edge" ones.
547  ParentBB->getInstList().push_back(BI.clone());
548  } else {
549  // Create a new unconditional branch that will continue the loop as a new
550  // terminator.
551  BranchInst::Create(ContinueBB, ParentBB);
552  }
553  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
554  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
555  } else {
556  // Only unswitching a subset of inputs to the condition, so we will need to
557  // build a new branch that merges the invariant inputs.
558  if (ExitDirection)
560  "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
561  "condition!");
562  else
564  "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
565  " condition!");
566  buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
567  *UnswitchedBB, *NewPH);
568  }
569 
570  // Update the dominator tree with the added edge.
571  DT.insertEdge(OldPH, UnswitchedBB);
572 
573  // After the dominator tree was updated with the added edge, update MemorySSA
574  // if available.
575  if (MSSAU) {
577  Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
578  MSSAU->applyInsertUpdates(Updates, DT);
579  }
580 
581  // Finish updating dominator tree and memory ssa for full unswitch.
582  if (FullUnswitch) {
583  if (MSSAU) {
584  // Remove the cloned branch instruction.
585  ParentBB->getTerminator()->eraseFromParent();
586  // Create unconditional branch now.
587  BranchInst::Create(ContinueBB, ParentBB);
588  MSSAU->removeEdge(ParentBB, LoopExitBB);
589  }
590  DT.deleteEdge(ParentBB, LoopExitBB);
591  }
592 
593  if (MSSAU && VerifyMemorySSA)
594  MSSAU->getMemorySSA()->verifyMemorySSA();
595 
596  // Rewrite the relevant PHI nodes.
597  if (UnswitchedBB == LoopExitBB)
598  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
599  else
600  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
601  *ParentBB, *OldPH, FullUnswitch);
602 
603  // The constant we can replace all of our invariants with inside the loop
604  // body. If any of the invariants have a value other than this the loop won't
605  // be entered.
606  ConstantInt *Replacement = ExitDirection
609 
610  // Since this is an i1 condition we can also trivially replace uses of it
611  // within the loop with a constant.
612  for (Value *Invariant : Invariants)
613  replaceLoopInvariantUses(L, Invariant, *Replacement);
614 
615  // If this was full unswitching, we may have changed the nesting relationship
616  // for this loop so hoist it to its correct parent if needed.
617  if (FullUnswitch)
618  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
619 
620  if (MSSAU && VerifyMemorySSA)
621  MSSAU->getMemorySSA()->verifyMemorySSA();
622 
623  LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
624  ++NumTrivial;
625  ++NumBranches;
626  return true;
627 }
628 
629 /// Unswitch a trivial switch if the condition is loop invariant.
630 ///
631 /// This routine should only be called when loop code leading to the switch has
632 /// been validated as trivial (no side effects). This routine checks if the
633 /// condition is invariant and that at least one of the successors is a loop
634 /// exit. This allows us to unswitch without duplicating the loop, making it
635 /// trivial.
636 ///
637 /// If this routine fails to unswitch the switch it returns false.
638 ///
639 /// If the switch can be unswitched, this routine splits the preheader and
640 /// copies the switch above that split. If the default case is one of the
641 /// exiting cases, it copies the non-exiting cases and points them at the new
642 /// preheader. If the default case is not exiting, it copies the exiting cases
643 /// and points the default at the preheader. It preserves loop simplified form
644 /// (splitting the exit blocks as necessary). It simplifies the switch within
645 /// the loop by removing now-dead cases. If the default case is one of those
646 /// unswitched, it replaces its destination with a new basic block containing
647 /// only unreachable. Such basic blocks, while technically loop exits, are not
648 /// considered for unswitching so this is a stable transform and the same
649 /// switch will not be revisited. If after unswitching there is only a single
650 /// in-loop successor, the switch is further simplified to an unconditional
651 /// branch. Still more cleanup can be done with some simplifycfg like pass.
652 ///
653 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
654 /// invalidated by this.
656  LoopInfo &LI, ScalarEvolution *SE,
657  MemorySSAUpdater *MSSAU) {
658  LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
659  Value *LoopCond = SI.getCondition();
660 
661  // If this isn't switching on an invariant condition, we can't unswitch it.
662  if (!L.isLoopInvariant(LoopCond))
663  return false;
664 
665  auto *ParentBB = SI.getParent();
666 
667  // The same check must be used both for the default and the exit cases. We
668  // should never leave edges from the switch instruction to a basic block that
669  // we are unswitching, hence the condition used to determine the default case
670  // needs to also be used to populate ExitCaseIndices, which is then used to
671  // remove cases from the switch.
672  auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
673  // BBToCheck is not an exit block if it is inside loop L.
674  if (L.contains(&BBToCheck))
675  return false;
676  // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
677  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
678  return false;
679  // We do not unswitch a block that only has an unreachable statement, as
680  // it's possible this is a previously unswitched block. Only unswitch if
681  // either the terminator is not unreachable, or, if it is, it's not the only
682  // instruction in the block.
683  auto *TI = BBToCheck.getTerminator();
684  bool isUnreachable = isa<UnreachableInst>(TI);
685  return !isUnreachable ||
686  (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
687  };
688 
689  SmallVector<int, 4> ExitCaseIndices;
690  for (auto Case : SI.cases())
691  if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
692  ExitCaseIndices.push_back(Case.getCaseIndex());
693  BasicBlock *DefaultExitBB = nullptr;
696  if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
697  DefaultExitBB = SI.getDefaultDest();
698  } else if (ExitCaseIndices.empty())
699  return false;
700 
701  LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
702 
703  if (MSSAU && VerifyMemorySSA)
704  MSSAU->getMemorySSA()->verifyMemorySSA();
705 
706  // We may need to invalidate SCEVs for the outermost loop reached by any of
707  // the exits.
708  Loop *OuterL = &L;
709 
710  if (DefaultExitBB) {
711  // Clear out the default destination temporarily to allow accurate
712  // predecessor lists to be examined below.
713  SI.setDefaultDest(nullptr);
714  // Check the loop containing this exit.
715  Loop *ExitL = LI.getLoopFor(DefaultExitBB);
716  if (!ExitL || ExitL->contains(OuterL))
717  OuterL = ExitL;
718  }
719 
720  // Store the exit cases into a separate data structure and remove them from
721  // the switch.
722  SmallVector<std::tuple<ConstantInt *, BasicBlock *,
724  4> ExitCases;
725  ExitCases.reserve(ExitCaseIndices.size());
727  // We walk the case indices backwards so that we remove the last case first
728  // and don't disrupt the earlier indices.
729  for (unsigned Index : reverse(ExitCaseIndices)) {
730  auto CaseI = SI.case_begin() + Index;
731  // Compute the outer loop from this exit.
732  Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
733  if (!ExitL || ExitL->contains(OuterL))
734  OuterL = ExitL;
735  // Save the value of this case.
736  auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
737  ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
738  // Delete the unswitched cases.
739  SIW.removeCase(CaseI);
740  }
741 
742  if (SE) {
743  if (OuterL)
744  SE->forgetLoop(OuterL);
745  else
746  SE->forgetTopmostLoop(&L);
747  }
748 
749  // Check if after this all of the remaining cases point at the same
750  // successor.
751  BasicBlock *CommonSuccBB = nullptr;
752  if (SI.getNumCases() > 0 &&
753  all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
754  return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
755  }))
756  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
757  if (!DefaultExitBB) {
758  // If we're not unswitching the default, we need it to match any cases to
759  // have a common successor or if we have no cases it is the common
760  // successor.
761  if (SI.getNumCases() == 0)
762  CommonSuccBB = SI.getDefaultDest();
763  else if (SI.getDefaultDest() != CommonSuccBB)
764  CommonSuccBB = nullptr;
765  }
766 
767  // Split the preheader, so that we know that there is a safe place to insert
768  // the switch.
769  BasicBlock *OldPH = L.getLoopPreheader();
770  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
771  OldPH->getTerminator()->eraseFromParent();
772 
773  // Now add the unswitched switch.
774  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
775  SwitchInstProfUpdateWrapper NewSIW(*NewSI);
776 
777  // Rewrite the IR for the unswitched basic blocks. This requires two steps.
778  // First, we split any exit blocks with remaining in-loop predecessors. Then
779  // we update the PHIs in one of two ways depending on if there was a split.
780  // We walk in reverse so that we split in the same order as the cases
781  // appeared. This is purely for convenience of reading the resulting IR, but
782  // it doesn't cost anything really.
783  SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
785  // Handle the default exit if necessary.
786  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
787  // ranges aren't quite powerful enough yet.
788  if (DefaultExitBB) {
789  if (pred_empty(DefaultExitBB)) {
790  UnswitchedExitBBs.insert(DefaultExitBB);
791  rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
792  } else {
793  auto *SplitBB =
794  SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
795  rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
796  *ParentBB, *OldPH,
797  /*FullUnswitch*/ true);
798  DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
799  }
800  }
801  // Note that we must use a reference in the for loop so that we update the
802  // container.
803  for (auto &ExitCase : reverse(ExitCases)) {
804  // Grab a reference to the exit block in the pair so that we can update it.
805  BasicBlock *ExitBB = std::get<1>(ExitCase);
806 
807  // If this case is the last edge into the exit block, we can simply reuse it
808  // as it will no longer be a loop exit. No mapping necessary.
809  if (pred_empty(ExitBB)) {
810  // Only rewrite once.
811  if (UnswitchedExitBBs.insert(ExitBB).second)
812  rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
813  continue;
814  }
815 
816  // Otherwise we need to split the exit block so that we retain an exit
817  // block from the loop and a target for the unswitched condition.
818  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
819  if (!SplitExitBB) {
820  // If this is the first time we see this, do the split and remember it.
821  SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
822  rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
823  *ParentBB, *OldPH,
824  /*FullUnswitch*/ true);
825  }
826  // Update the case pair to point to the split block.
827  std::get<1>(ExitCase) = SplitExitBB;
828  }
829 
830  // Now add the unswitched cases. We do this in reverse order as we built them
831  // in reverse order.
832  for (auto &ExitCase : reverse(ExitCases)) {
833  ConstantInt *CaseVal = std::get<0>(ExitCase);
834  BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
835 
836  NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
837  }
838 
839  // If the default was unswitched, re-point it and add explicit cases for
840  // entering the loop.
841  if (DefaultExitBB) {
842  NewSIW->setDefaultDest(DefaultExitBB);
843  NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
844 
845  // We removed all the exit cases, so we just copy the cases to the
846  // unswitched switch.
847  for (const auto &Case : SI.cases())
848  NewSIW.addCase(Case.getCaseValue(), NewPH,
850  } else if (DefaultCaseWeight) {
851  // We have to set branch weight of the default case.
852  uint64_t SW = *DefaultCaseWeight;
853  for (const auto &Case : SI.cases()) {
854  auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
855  assert(W &&
856  "case weight must be defined as default case weight is defined");
857  SW += *W;
858  }
859  NewSIW.setSuccessorWeight(0, SW);
860  }
861 
862  // If we ended up with a common successor for every path through the switch
863  // after unswitching, rewrite it to an unconditional branch to make it easy
864  // to recognize. Otherwise we potentially have to recognize the default case
865  // pointing at unreachable and other complexity.
866  if (CommonSuccBB) {
867  BasicBlock *BB = SI.getParent();
868  // We may have had multiple edges to this common successor block, so remove
869  // them as predecessors. We skip the first one, either the default or the
870  // actual first case.
871  bool SkippedFirst = DefaultExitBB == nullptr;
872  for (auto Case : SI.cases()) {
873  assert(Case.getCaseSuccessor() == CommonSuccBB &&
874  "Non-common successor!");
875  (void)Case;
876  if (!SkippedFirst) {
877  SkippedFirst = true;
878  continue;
879  }
880  CommonSuccBB->removePredecessor(BB,
881  /*KeepOneInputPHIs*/ true);
882  }
883  // Now nuke the switch and replace it with a direct branch.
884  SIW.eraseFromParent();
885  BranchInst::Create(CommonSuccBB, BB);
886  } else if (DefaultExitBB) {
887  assert(SI.getNumCases() > 0 &&
888  "If we had no cases we'd have a common successor!");
889  // Move the last case to the default successor. This is valid as if the
890  // default got unswitched it cannot be reached. This has the advantage of
891  // being simple and keeping the number of edges from this switch to
892  // successors the same, and avoiding any PHI update complexity.
893  auto LastCaseI = std::prev(SI.case_end());
894 
895  SI.setDefaultDest(LastCaseI->getCaseSuccessor());
896  SIW.setSuccessorWeight(
897  0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
898  SIW.removeCase(LastCaseI);
899  }
900 
901  // Walk the unswitched exit blocks and the unswitched split blocks and update
902  // the dominator tree based on the CFG edits. While we are walking unordered
903  // containers here, the API for applyUpdates takes an unordered list of
904  // updates and requires them to not contain duplicates.
906  for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
907  DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
908  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
909  }
910  for (auto SplitUnswitchedPair : SplitExitBBMap) {
911  DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
912  DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
913  }
914 
915  if (MSSAU) {
916  MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
917  if (VerifyMemorySSA)
918  MSSAU->getMemorySSA()->verifyMemorySSA();
919  } else {
920  DT.applyUpdates(DTUpdates);
921  }
922 
924 
925  // We may have changed the nesting relationship for this loop so hoist it to
926  // its correct parent if needed.
927  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
928 
929  if (MSSAU && VerifyMemorySSA)
930  MSSAU->getMemorySSA()->verifyMemorySSA();
931 
932  ++NumTrivial;
933  ++NumSwitches;
934  LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
935  return true;
936 }
937 
938 /// This routine scans the loop to find a branch or switch which occurs before
939 /// any side effects occur. These can potentially be unswitched without
940 /// duplicating the loop. If a branch or switch is successfully unswitched the
941 /// scanning continues to see if subsequent branches or switches have become
942 /// trivial. Once all trivial candidates have been unswitched, this routine
943 /// returns.
944 ///
945 /// The return value indicates whether anything was unswitched (and therefore
946 /// changed).
947 ///
948 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
949 /// invalidated by this.
951  LoopInfo &LI, ScalarEvolution *SE,
952  MemorySSAUpdater *MSSAU) {
953  bool Changed = false;
954 
955  // If loop header has only one reachable successor we should keep looking for
956  // trivial condition candidates in the successor as well. An alternative is
957  // to constant fold conditions and merge successors into loop header (then we
958  // only need to check header's terminator). The reason for not doing this in
959  // LoopUnswitch pass is that it could potentially break LoopPassManager's
960  // invariants. Folding dead branches could either eliminate the current loop
961  // or make other loops unreachable. LCSSA form might also not be preserved
962  // after deleting branches. The following code keeps traversing loop header's
963  // successors until it finds the trivial condition candidate (condition that
964  // is not a constant). Since unswitching generates branches with constant
965  // conditions, this scenario could be very common in practice.
966  BasicBlock *CurrentBB = L.getHeader();
968  Visited.insert(CurrentBB);
969  do {
970  // Check if there are any side-effecting instructions (e.g. stores, calls,
971  // volatile loads) in the part of the loop that the code *would* execute
972  // without unswitching.
973  if (MSSAU) // Possible early exit with MSSA
974  if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
975  if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
976  return Changed;
977  if (llvm::any_of(*CurrentBB,
978  [](Instruction &I) { return I.mayHaveSideEffects(); }))
979  return Changed;
980 
981  Instruction *CurrentTerm = CurrentBB->getTerminator();
982 
983  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
984  // Don't bother trying to unswitch past a switch with a constant
985  // condition. This should be removed prior to running this pass by
986  // simplifycfg.
987  if (isa<Constant>(SI->getCondition()))
988  return Changed;
989 
990  if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
991  // Couldn't unswitch this one so we're done.
992  return Changed;
993 
994  // Mark that we managed to unswitch something.
995  Changed = true;
996 
997  // If unswitching turned the terminator into an unconditional branch then
998  // we can continue. The unswitching logic specifically works to fold any
999  // cases it can into an unconditional branch to make it easier to
1000  // recognize here.
1001  auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1002  if (!BI || BI->isConditional())
1003  return Changed;
1004 
1005  CurrentBB = BI->getSuccessor(0);
1006  continue;
1007  }
1008 
1009  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1010  if (!BI)
1011  // We do not understand other terminator instructions.
1012  return Changed;
1013 
1014  // Don't bother trying to unswitch past an unconditional branch or a branch
1015  // with a constant value. These should be removed by simplifycfg prior to
1016  // running this pass.
1017  if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1018  return Changed;
1019 
1020  // Found a trivial condition candidate: non-foldable conditional branch. If
1021  // we fail to unswitch this, we can't do anything else that is trivial.
1022  if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1023  return Changed;
1024 
1025  // Mark that we managed to unswitch something.
1026  Changed = true;
1027 
1028  // If we only unswitched some of the conditions feeding the branch, we won't
1029  // have collapsed it to a single successor.
1030  BI = cast<BranchInst>(CurrentBB->getTerminator());
1031  if (BI->isConditional())
1032  return Changed;
1033 
1034  // Follow the newly unconditional branch into its successor.
1035  CurrentBB = BI->getSuccessor(0);
1036 
1037  // When continuing, if we exit the loop or reach a previous visited block,
1038  // then we can not reach any trivial condition candidates (unfoldable
1039  // branch instructions or switch instructions) and no unswitch can happen.
1040  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1041 
1042  return Changed;
1043 }
1044 
1045 /// Build the cloned blocks for an unswitched copy of the given loop.
1046 ///
1047 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1048 /// after the split block (`SplitBB`) that will be used to select between the
1049 /// cloned and original loop.
1050 ///
1051 /// This routine handles cloning all of the necessary loop blocks and exit
1052 /// blocks including rewriting their instructions and the relevant PHI nodes.
1053 /// Any loop blocks or exit blocks which are dominated by a different successor
1054 /// than the one for this clone of the loop blocks can be trivially skipped. We
1055 /// use the `DominatingSucc` map to determine whether a block satisfies that
1056 /// property with a simple map lookup.
1057 ///
1058 /// It also correctly creates the unconditional branch in the cloned
1059 /// unswitched parent block to only point at the unswitched successor.
1060 ///
1061 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1062 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
1063 /// the cloned blocks (and their loops) are left without full `LoopInfo`
1064 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1065 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
1066 /// instead the caller must recompute an accurate DT. It *does* correctly
1067 /// update the `AssumptionCache` provided in `AC`.
1069  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1070  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1071  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1072  const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1073  ValueToValueMapTy &VMap,
1075  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
1076  SmallVector<BasicBlock *, 4> NewBlocks;
1077  NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1078 
1079  // We will need to clone a bunch of blocks, wrap up the clone operation in
1080  // a helper.
1081  auto CloneBlock = [&](BasicBlock *OldBB) {
1082  // Clone the basic block and insert it before the new preheader.
1083  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1084  NewBB->moveBefore(LoopPH);
1085 
1086  // Record this block and the mapping.
1087  NewBlocks.push_back(NewBB);
1088  VMap[OldBB] = NewBB;
1089 
1090  return NewBB;
1091  };
1092 
1093  // We skip cloning blocks when they have a dominating succ that is not the
1094  // succ we are cloning for.
1095  auto SkipBlock = [&](BasicBlock *BB) {
1096  auto It = DominatingSucc.find(BB);
1097  return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1098  };
1099 
1100  // First, clone the preheader.
1101  auto *ClonedPH = CloneBlock(LoopPH);
1102 
1103  // Then clone all the loop blocks, skipping the ones that aren't necessary.
1104  for (auto *LoopBB : L.blocks())
1105  if (!SkipBlock(LoopBB))
1106  CloneBlock(LoopBB);
1107 
1108  // Split all the loop exit edges so that when we clone the exit blocks, if
1109  // any of the exit blocks are *also* a preheader for some other loop, we
1110  // don't create multiple predecessors entering the loop header.
1111  for (auto *ExitBB : ExitBlocks) {
1112  if (SkipBlock(ExitBB))
1113  continue;
1114 
1115  // When we are going to clone an exit, we don't need to clone all the
1116  // instructions in the exit block and we want to ensure we have an easy
1117  // place to merge the CFG, so split the exit first. This is always safe to
1118  // do because there cannot be any non-loop predecessors of a loop exit in
1119  // loop simplified form.
1120  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1121 
1122  // Rearrange the names to make it easier to write test cases by having the
1123  // exit block carry the suffix rather than the merge block carrying the
1124  // suffix.
1125  MergeBB->takeName(ExitBB);
1126  ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1127 
1128  // Now clone the original exit block.
1129  auto *ClonedExitBB = CloneBlock(ExitBB);
1130  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1131  "Exit block should have been split to have one successor!");
1132  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1133  "Cloned exit block has the wrong successor!");
1134 
1135  // Remap any cloned instructions and create a merge phi node for them.
1136  for (auto ZippedInsts : llvm::zip_first(
1137  llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1138  llvm::make_range(ClonedExitBB->begin(),
1139  std::prev(ClonedExitBB->end())))) {
1140  Instruction &I = std::get<0>(ZippedInsts);
1141  Instruction &ClonedI = std::get<1>(ZippedInsts);
1142 
1143  // The only instructions in the exit block should be PHI nodes and
1144  // potentially a landing pad.
1145  assert(
1146  (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1147  "Bad instruction in exit block!");
1148  // We should have a value map between the instruction and its clone.
1149  assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1150 
1151  auto *MergePN =
1152  PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1153  &*MergeBB->getFirstInsertionPt());
1154  I.replaceAllUsesWith(MergePN);
1155  MergePN->addIncoming(&I, ExitBB);
1156  MergePN->addIncoming(&ClonedI, ClonedExitBB);
1157  }
1158  }
1159 
1160  // Rewrite the instructions in the cloned blocks to refer to the instructions
1161  // in the cloned blocks. We have to do this as a second pass so that we have
1162  // everything available. Also, we have inserted new instructions which may
1163  // include assume intrinsics, so we update the assumption cache while
1164  // processing this.
1165  for (auto *ClonedBB : NewBlocks)
1166  for (Instruction &I : *ClonedBB) {
1167  RemapInstruction(&I, VMap,
1169  if (auto *II = dyn_cast<AssumeInst>(&I))
1170  AC.registerAssumption(II);
1171  }
1172 
1173  // Update any PHI nodes in the cloned successors of the skipped blocks to not
1174  // have spurious incoming values.
1175  for (auto *LoopBB : L.blocks())
1176  if (SkipBlock(LoopBB))
1177  for (auto *SuccBB : successors(LoopBB))
1178  if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1179  for (PHINode &PN : ClonedSuccBB->phis())
1180  PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1181 
1182  // Remove the cloned parent as a predecessor of any successor we ended up
1183  // cloning other than the unswitched one.
1184  auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1185  for (auto *SuccBB : successors(ParentBB)) {
1186  if (SuccBB == UnswitchedSuccBB)
1187  continue;
1188 
1189  auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1190  if (!ClonedSuccBB)
1191  continue;
1192 
1193  ClonedSuccBB->removePredecessor(ClonedParentBB,
1194  /*KeepOneInputPHIs*/ true);
1195  }
1196 
1197  // Replace the cloned branch with an unconditional branch to the cloned
1198  // unswitched successor.
1199  auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1200  Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1201  // Trivial Simplification. If Terminator is a conditional branch and
1202  // condition becomes dead - erase it.
1203  Value *ClonedConditionToErase = nullptr;
1204  if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1205  ClonedConditionToErase = BI->getCondition();
1206  else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1207  ClonedConditionToErase = SI->getCondition();
1208 
1209  ClonedTerminator->eraseFromParent();
1210  BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1211 
1212  if (ClonedConditionToErase)
1213  RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1214  MSSAU);
1215 
1216  // If there are duplicate entries in the PHI nodes because of multiple edges
1217  // to the unswitched successor, we need to nuke all but one as we replaced it
1218  // with a direct branch.
1219  for (PHINode &PN : ClonedSuccBB->phis()) {
1220  bool Found = false;
1221  // Loop over the incoming operands backwards so we can easily delete as we
1222  // go without invalidating the index.
1223  for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1224  if (PN.getIncomingBlock(i) != ClonedParentBB)
1225  continue;
1226  if (!Found) {
1227  Found = true;
1228  continue;
1229  }
1230  PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1231  }
1232  }
1233 
1234  // Record the domtree updates for the new blocks.
1236  for (auto *ClonedBB : NewBlocks) {
1237  for (auto *SuccBB : successors(ClonedBB))
1238  if (SuccSet.insert(SuccBB).second)
1239  DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1240  SuccSet.clear();
1241  }
1242 
1243  return ClonedPH;
1244 }
1245 
1246 /// Recursively clone the specified loop and all of its children.
1247 ///
1248 /// The target parent loop for the clone should be provided, or can be null if
1249 /// the clone is a top-level loop. While cloning, all the blocks are mapped
1250 /// with the provided value map. The entire original loop must be present in
1251 /// the value map. The cloned loop is returned.
1252 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1253  const ValueToValueMapTy &VMap, LoopInfo &LI) {
1254  auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1255  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1256  ClonedL.reserveBlocks(OrigL.getNumBlocks());
1257  for (auto *BB : OrigL.blocks()) {
1258  auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1259  ClonedL.addBlockEntry(ClonedBB);
1260  if (LI.getLoopFor(BB) == &OrigL)
1261  LI.changeLoopFor(ClonedBB, &ClonedL);
1262  }
1263  };
1264 
1265  // We specially handle the first loop because it may get cloned into
1266  // a different parent and because we most commonly are cloning leaf loops.
1267  Loop *ClonedRootL = LI.AllocateLoop();
1268  if (RootParentL)
1269  RootParentL->addChildLoop(ClonedRootL);
1270  else
1271  LI.addTopLevelLoop(ClonedRootL);
1272  AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1273 
1274  if (OrigRootL.isInnermost())
1275  return ClonedRootL;
1276 
1277  // If we have a nest, we can quickly clone the entire loop nest using an
1278  // iterative approach because it is a tree. We keep the cloned parent in the
1279  // data structure to avoid repeatedly querying through a map to find it.
1280  SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1281  // Build up the loops to clone in reverse order as we'll clone them from the
1282  // back.
1283  for (Loop *ChildL : llvm::reverse(OrigRootL))
1284  LoopsToClone.push_back({ClonedRootL, ChildL});
1285  do {
1286  Loop *ClonedParentL, *L;
1287  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1288  Loop *ClonedL = LI.AllocateLoop();
1289  ClonedParentL->addChildLoop(ClonedL);
1290  AddClonedBlocksToLoop(*L, *ClonedL);
1291  for (Loop *ChildL : llvm::reverse(*L))
1292  LoopsToClone.push_back({ClonedL, ChildL});
1293  } while (!LoopsToClone.empty());
1294 
1295  return ClonedRootL;
1296 }
1297 
1298 /// Build the cloned loops of an original loop from unswitching.
1299 ///
1300 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1301 /// operation. We need to re-verify that there even is a loop (as the backedge
1302 /// may not have been cloned), and even if there are remaining backedges the
1303 /// backedge set may be different. However, we know that each child loop is
1304 /// undisturbed, we only need to find where to place each child loop within
1305 /// either any parent loop or within a cloned version of the original loop.
1306 ///
1307 /// Because child loops may end up cloned outside of any cloned version of the
1308 /// original loop, multiple cloned sibling loops may be created. All of them
1309 /// are returned so that the newly introduced loop nest roots can be
1310 /// identified.
1311 static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1312  const ValueToValueMapTy &VMap, LoopInfo &LI,
1313  SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1314  Loop *ClonedL = nullptr;
1315 
1316  auto *OrigPH = OrigL.getLoopPreheader();
1317  auto *OrigHeader = OrigL.getHeader();
1318 
1319  auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1320  auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1321 
1322  // We need to know the loops of the cloned exit blocks to even compute the
1323  // accurate parent loop. If we only clone exits to some parent of the
1324  // original parent, we want to clone into that outer loop. We also keep track
1325  // of the loops that our cloned exit blocks participate in.
1326  Loop *ParentL = nullptr;
1327  SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1329  ClonedExitsInLoops.reserve(ExitBlocks.size());
1330  for (auto *ExitBB : ExitBlocks)
1331  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1332  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1333  ExitLoopMap[ClonedExitBB] = ExitL;
1334  ClonedExitsInLoops.push_back(ClonedExitBB);
1335  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1336  ParentL = ExitL;
1337  }
1338  assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1339  ParentL->contains(OrigL.getParentLoop())) &&
1340  "The computed parent loop should always contain (or be) the parent of "
1341  "the original loop.");
1342 
1343  // We build the set of blocks dominated by the cloned header from the set of
1344  // cloned blocks out of the original loop. While not all of these will
1345  // necessarily be in the cloned loop, it is enough to establish that they
1346  // aren't in unreachable cycles, etc.
1347  SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1348  for (auto *BB : OrigL.blocks())
1349  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1350  ClonedLoopBlocks.insert(ClonedBB);
1351 
1352  // Rebuild the set of blocks that will end up in the cloned loop. We may have
1353  // skipped cloning some region of this loop which can in turn skip some of
1354  // the backedges so we have to rebuild the blocks in the loop based on the
1355  // backedges that remain after cloning.
1357  SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1358  for (auto *Pred : predecessors(ClonedHeader)) {
1359  // The only possible non-loop header predecessor is the preheader because
1360  // we know we cloned the loop in simplified form.
1361  if (Pred == ClonedPH)
1362  continue;
1363 
1364  // Because the loop was in simplified form, the only non-loop predecessor
1365  // should be the preheader.
1366  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1367  "header other than the preheader "
1368  "that is not part of the loop!");
1369 
1370  // Insert this block into the loop set and on the first visit (and if it
1371  // isn't the header we're currently walking) put it into the worklist to
1372  // recurse through.
1373  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1374  Worklist.push_back(Pred);
1375  }
1376 
1377  // If we had any backedges then there *is* a cloned loop. Put the header into
1378  // the loop set and then walk the worklist backwards to find all the blocks
1379  // that remain within the loop after cloning.
1380  if (!BlocksInClonedLoop.empty()) {
1381  BlocksInClonedLoop.insert(ClonedHeader);
1382 
1383  while (!Worklist.empty()) {
1384  BasicBlock *BB = Worklist.pop_back_val();
1385  assert(BlocksInClonedLoop.count(BB) &&
1386  "Didn't put block into the loop set!");
1387 
1388  // Insert any predecessors that are in the possible set into the cloned
1389  // set, and if the insert is successful, add them to the worklist. Note
1390  // that we filter on the blocks that are definitely reachable via the
1391  // backedge to the loop header so we may prune out dead code within the
1392  // cloned loop.
1393  for (auto *Pred : predecessors(BB))
1394  if (ClonedLoopBlocks.count(Pred) &&
1395  BlocksInClonedLoop.insert(Pred).second)
1396  Worklist.push_back(Pred);
1397  }
1398 
1399  ClonedL = LI.AllocateLoop();
1400  if (ParentL) {
1401  ParentL->addBasicBlockToLoop(ClonedPH, LI);
1402  ParentL->addChildLoop(ClonedL);
1403  } else {
1404  LI.addTopLevelLoop(ClonedL);
1405  }
1406  NonChildClonedLoops.push_back(ClonedL);
1407 
1408  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1409  // We don't want to just add the cloned loop blocks based on how we
1410  // discovered them. The original order of blocks was carefully built in
1411  // a way that doesn't rely on predecessor ordering. Rather than re-invent
1412  // that logic, we just re-walk the original blocks (and those of the child
1413  // loops) and filter them as we add them into the cloned loop.
1414  for (auto *BB : OrigL.blocks()) {
1415  auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1416  if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1417  continue;
1418 
1419  // Directly add the blocks that are only in this loop.
1420  if (LI.getLoopFor(BB) == &OrigL) {
1421  ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1422  continue;
1423  }
1424 
1425  // We want to manually add it to this loop and parents.
1426  // Registering it with LoopInfo will happen when we clone the top
1427  // loop for this block.
1428  for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1429  PL->addBlockEntry(ClonedBB);
1430  }
1431 
1432  // Now add each child loop whose header remains within the cloned loop. All
1433  // of the blocks within the loop must satisfy the same constraints as the
1434  // header so once we pass the header checks we can just clone the entire
1435  // child loop nest.
1436  for (Loop *ChildL : OrigL) {
1437  auto *ClonedChildHeader =
1438  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1439  if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1440  continue;
1441 
1442 #ifndef NDEBUG
1443  // We should never have a cloned child loop header but fail to have
1444  // all of the blocks for that child loop.
1445  for (auto *ChildLoopBB : ChildL->blocks())
1446  assert(BlocksInClonedLoop.count(
1447  cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1448  "Child cloned loop has a header within the cloned outer "
1449  "loop but not all of its blocks!");
1450 #endif
1451 
1452  cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1453  }
1454  }
1455 
1456  // Now that we've handled all the components of the original loop that were
1457  // cloned into a new loop, we still need to handle anything from the original
1458  // loop that wasn't in a cloned loop.
1459 
1460  // Figure out what blocks are left to place within any loop nest containing
1461  // the unswitched loop. If we never formed a loop, the cloned PH is one of
1462  // them.
1463  SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1464  if (BlocksInClonedLoop.empty())
1465  UnloopedBlockSet.insert(ClonedPH);
1466  for (auto *ClonedBB : ClonedLoopBlocks)
1467  if (!BlocksInClonedLoop.count(ClonedBB))
1468  UnloopedBlockSet.insert(ClonedBB);
1469 
1470  // Copy the cloned exits and sort them in ascending loop depth, we'll work
1471  // backwards across these to process them inside out. The order shouldn't
1472  // matter as we're just trying to build up the map from inside-out; we use
1473  // the map in a more stably ordered way below.
1474  auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1475  llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1476  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1477  ExitLoopMap.lookup(RHS)->getLoopDepth();
1478  });
1479 
1480  // Populate the existing ExitLoopMap with everything reachable from each
1481  // exit, starting from the inner most exit.
1482  while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1483  assert(Worklist.empty() && "Didn't clear worklist!");
1484 
1485  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1486  Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1487 
1488  // Walk the CFG back until we hit the cloned PH adding everything reachable
1489  // and in the unlooped set to this exit block's loop.
1490  Worklist.push_back(ExitBB);
1491  do {
1492  BasicBlock *BB = Worklist.pop_back_val();
1493  // We can stop recursing at the cloned preheader (if we get there).
1494  if (BB == ClonedPH)
1495  continue;
1496 
1497  for (BasicBlock *PredBB : predecessors(BB)) {
1498  // If this pred has already been moved to our set or is part of some
1499  // (inner) loop, no update needed.
1500  if (!UnloopedBlockSet.erase(PredBB)) {
1501  assert(
1502  (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1503  "Predecessor not mapped to a loop!");
1504  continue;
1505  }
1506 
1507  // We just insert into the loop set here. We'll add these blocks to the
1508  // exit loop after we build up the set in an order that doesn't rely on
1509  // predecessor order (which in turn relies on use list order).
1510  bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1511  (void)Inserted;
1512  assert(Inserted && "Should only visit an unlooped block once!");
1513 
1514  // And recurse through to its predecessors.
1515  Worklist.push_back(PredBB);
1516  }
1517  } while (!Worklist.empty());
1518  }
1519 
1520  // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1521  // blocks to their outer loops, walk the cloned blocks and the cloned exits
1522  // in their original order adding them to the correct loop.
1523 
1524  // We need a stable insertion order. We use the order of the original loop
1525  // order and map into the correct parent loop.
1526  for (auto *BB : llvm::concat<BasicBlock *const>(
1527  makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1528  if (Loop *OuterL = ExitLoopMap.lookup(BB))
1529  OuterL->addBasicBlockToLoop(BB, LI);
1530 
1531 #ifndef NDEBUG
1532  for (auto &BBAndL : ExitLoopMap) {
1533  auto *BB = BBAndL.first;
1534  auto *OuterL = BBAndL.second;
1535  assert(LI.getLoopFor(BB) == OuterL &&
1536  "Failed to put all blocks into outer loops!");
1537  }
1538 #endif
1539 
1540  // Now that all the blocks are placed into the correct containing loop in the
1541  // absence of child loops, find all the potentially cloned child loops and
1542  // clone them into whatever outer loop we placed their header into.
1543  for (Loop *ChildL : OrigL) {
1544  auto *ClonedChildHeader =
1545  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1546  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1547  continue;
1548 
1549 #ifndef NDEBUG
1550  for (auto *ChildLoopBB : ChildL->blocks())
1551  assert(VMap.count(ChildLoopBB) &&
1552  "Cloned a child loop header but not all of that loops blocks!");
1553 #endif
1554 
1555  NonChildClonedLoops.push_back(cloneLoopNest(
1556  *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1557  }
1558 }
1559 
1560 static void
1562  ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1563  DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1564  // Find all the dead clones, and remove them from their successors.
1565  SmallVector<BasicBlock *, 16> DeadBlocks;
1566  for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1567  for (auto &VMap : VMaps)
1568  if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1569  if (!DT.isReachableFromEntry(ClonedBB)) {
1570  for (BasicBlock *SuccBB : successors(ClonedBB))
1571  SuccBB->removePredecessor(ClonedBB);
1572  DeadBlocks.push_back(ClonedBB);
1573  }
1574 
1575  // Remove all MemorySSA in the dead blocks
1576  if (MSSAU) {
1577  SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1578  DeadBlocks.end());
1579  MSSAU->removeBlocks(DeadBlockSet);
1580  }
1581 
1582  // Drop any remaining references to break cycles.
1583  for (BasicBlock *BB : DeadBlocks)
1584  BB->dropAllReferences();
1585  // Erase them from the IR.
1586  for (BasicBlock *BB : DeadBlocks)
1587  BB->eraseFromParent();
1588 }
1589 
1590 static void
1592  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1593  DominatorTree &DT, LoopInfo &LI,
1594  MemorySSAUpdater *MSSAU,
1595  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
1596  // Find all the dead blocks tied to this loop, and remove them from their
1597  // successors.
1598  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1599 
1600  // Start with loop/exit blocks and get a transitive closure of reachable dead
1601  // blocks.
1602  SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1603  ExitBlocks.end());
1604  DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1605  while (!DeathCandidates.empty()) {
1606  auto *BB = DeathCandidates.pop_back_val();
1607  if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1608  for (BasicBlock *SuccBB : successors(BB)) {
1609  SuccBB->removePredecessor(BB);
1610  DeathCandidates.push_back(SuccBB);
1611  }
1612  DeadBlockSet.insert(BB);
1613  }
1614  }
1615 
1616  // Remove all MemorySSA in the dead blocks
1617  if (MSSAU)
1618  MSSAU->removeBlocks(DeadBlockSet);
1619 
1620  // Filter out the dead blocks from the exit blocks list so that it can be
1621  // used in the caller.
1622  llvm::erase_if(ExitBlocks,
1623  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1624 
1625  // Walk from this loop up through its parents removing all of the dead blocks.
1626  for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1627  for (auto *BB : DeadBlockSet)
1628  ParentL->getBlocksSet().erase(BB);
1629  llvm::erase_if(ParentL->getBlocksVector(),
1630  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1631  }
1632 
1633  // Now delete the dead child loops. This raw delete will clear them
1634  // recursively.
1635  llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1636  if (!DeadBlockSet.count(ChildL->getHeader()))
1637  return false;
1638 
1639  assert(llvm::all_of(ChildL->blocks(),
1640  [&](BasicBlock *ChildBB) {
1641  return DeadBlockSet.count(ChildBB);
1642  }) &&
1643  "If the child loop header is dead all blocks in the child loop must "
1644  "be dead as well!");
1645  DestroyLoopCB(*ChildL, ChildL->getName());
1646  LI.destroy(ChildL);
1647  return true;
1648  });
1649 
1650  // Remove the loop mappings for the dead blocks and drop all the references
1651  // from these blocks to others to handle cyclic references as we start
1652  // deleting the blocks themselves.
1653  for (auto *BB : DeadBlockSet) {
1654  // Check that the dominator tree has already been updated.
1655  assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1656  LI.changeLoopFor(BB, nullptr);
1657  // Drop all uses of the instructions to make sure we won't have dangling
1658  // uses in other blocks.
1659  for (auto &I : *BB)
1660  if (!I.use_empty())
1661  I.replaceAllUsesWith(UndefValue::get(I.getType()));
1662  BB->dropAllReferences();
1663  }
1664 
1665  // Actually delete the blocks now that they've been fully unhooked from the
1666  // IR.
1667  for (auto *BB : DeadBlockSet)
1668  BB->eraseFromParent();
1669 }
1670 
1671 /// Recompute the set of blocks in a loop after unswitching.
1672 ///
1673 /// This walks from the original headers predecessors to rebuild the loop. We
1674 /// take advantage of the fact that new blocks can't have been added, and so we
1675 /// filter by the original loop's blocks. This also handles potentially
1676 /// unreachable code that we don't want to explore but might be found examining
1677 /// the predecessors of the header.
1678 ///
1679 /// If the original loop is no longer a loop, this will return an empty set. If
1680 /// it remains a loop, all the blocks within it will be added to the set
1681 /// (including those blocks in inner loops).
1683  LoopInfo &LI) {
1685 
1686  auto *PH = L.getLoopPreheader();
1687  auto *Header = L.getHeader();
1688 
1689  // A worklist to use while walking backwards from the header.
1691 
1692  // First walk the predecessors of the header to find the backedges. This will
1693  // form the basis of our walk.
1694  for (auto *Pred : predecessors(Header)) {
1695  // Skip the preheader.
1696  if (Pred == PH)
1697  continue;
1698 
1699  // Because the loop was in simplified form, the only non-loop predecessor
1700  // is the preheader.
1701  assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1702  "than the preheader that is not part of the "
1703  "loop!");
1704 
1705  // Insert this block into the loop set and on the first visit and, if it
1706  // isn't the header we're currently walking, put it into the worklist to
1707  // recurse through.
1708  if (LoopBlockSet.insert(Pred).second && Pred != Header)
1709  Worklist.push_back(Pred);
1710  }
1711 
1712  // If no backedges were found, we're done.
1713  if (LoopBlockSet.empty())
1714  return LoopBlockSet;
1715 
1716  // We found backedges, recurse through them to identify the loop blocks.
1717  while (!Worklist.empty()) {
1718  BasicBlock *BB = Worklist.pop_back_val();
1719  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1720 
1721  // No need to walk past the header.
1722  if (BB == Header)
1723  continue;
1724 
1725  // Because we know the inner loop structure remains valid we can use the
1726  // loop structure to jump immediately across the entire nested loop.
1727  // Further, because it is in loop simplified form, we can directly jump
1728  // to its preheader afterward.
1729  if (Loop *InnerL = LI.getLoopFor(BB))
1730  if (InnerL != &L) {
1731  assert(L.contains(InnerL) &&
1732  "Should not reach a loop *outside* this loop!");
1733  // The preheader is the only possible predecessor of the loop so
1734  // insert it into the set and check whether it was already handled.
1735  auto *InnerPH = InnerL->getLoopPreheader();
1736  assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1737  "but not contain the inner loop "
1738  "preheader!");
1739  if (!LoopBlockSet.insert(InnerPH).second)
1740  // The only way to reach the preheader is through the loop body
1741  // itself so if it has been visited the loop is already handled.
1742  continue;
1743 
1744  // Insert all of the blocks (other than those already present) into
1745  // the loop set. We expect at least the block that led us to find the
1746  // inner loop to be in the block set, but we may also have other loop
1747  // blocks if they were already enqueued as predecessors of some other
1748  // outer loop block.
1749  for (auto *InnerBB : InnerL->blocks()) {
1750  if (InnerBB == BB) {
1751  assert(LoopBlockSet.count(InnerBB) &&
1752  "Block should already be in the set!");
1753  continue;
1754  }
1755 
1756  LoopBlockSet.insert(InnerBB);
1757  }
1758 
1759  // Add the preheader to the worklist so we will continue past the
1760  // loop body.
1761  Worklist.push_back(InnerPH);
1762  continue;
1763  }
1764 
1765  // Insert any predecessors that were in the original loop into the new
1766  // set, and if the insert is successful, add them to the worklist.
1767  for (auto *Pred : predecessors(BB))
1768  if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1769  Worklist.push_back(Pred);
1770  }
1771 
1772  assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1773 
1774  // We've found all the blocks participating in the loop, return our completed
1775  // set.
1776  return LoopBlockSet;
1777 }
1778 
1779 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1780 ///
1781 /// The removal may have removed some child loops entirely but cannot have
1782 /// disturbed any remaining child loops. However, they may need to be hoisted
1783 /// to the parent loop (or to be top-level loops). The original loop may be
1784 /// completely removed.
1785 ///
1786 /// The sibling loops resulting from this update are returned. If the original
1787 /// loop remains a valid loop, it will be the first entry in this list with all
1788 /// of the newly sibling loops following it.
1789 ///
1790 /// Returns true if the loop remains a loop after unswitching, and false if it
1791 /// is no longer a loop after unswitching (and should not continue to be
1792 /// referenced).
1794  LoopInfo &LI,
1795  SmallVectorImpl<Loop *> &HoistedLoops) {
1796  auto *PH = L.getLoopPreheader();
1797 
1798  // Compute the actual parent loop from the exit blocks. Because we may have
1799  // pruned some exits the loop may be different from the original parent.
1800  Loop *ParentL = nullptr;
1801  SmallVector<Loop *, 4> ExitLoops;
1802  SmallVector<BasicBlock *, 4> ExitsInLoops;
1803  ExitsInLoops.reserve(ExitBlocks.size());
1804  for (auto *ExitBB : ExitBlocks)
1805  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1806  ExitLoops.push_back(ExitL);
1807  ExitsInLoops.push_back(ExitBB);
1808  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1809  ParentL = ExitL;
1810  }
1811 
1812  // Recompute the blocks participating in this loop. This may be empty if it
1813  // is no longer a loop.
1814  auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1815 
1816  // If we still have a loop, we need to re-set the loop's parent as the exit
1817  // block set changing may have moved it within the loop nest. Note that this
1818  // can only happen when this loop has a parent as it can only hoist the loop
1819  // *up* the nest.
1820  if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1821  // Remove this loop's (original) blocks from all of the intervening loops.
1822  for (Loop *IL = L.getParentLoop(); IL != ParentL;
1823  IL = IL->getParentLoop()) {
1824  IL->getBlocksSet().erase(PH);
1825  for (auto *BB : L.blocks())
1826  IL->getBlocksSet().erase(BB);
1827  llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1828  return BB == PH || L.contains(BB);
1829  });
1830  }
1831 
1832  LI.changeLoopFor(PH, ParentL);
1833  L.getParentLoop()->removeChildLoop(&L);
1834  if (ParentL)
1835  ParentL->addChildLoop(&L);
1836  else
1837  LI.addTopLevelLoop(&L);
1838  }
1839 
1840  // Now we update all the blocks which are no longer within the loop.
1841  auto &Blocks = L.getBlocksVector();
1842  auto BlocksSplitI =
1843  LoopBlockSet.empty()
1844  ? Blocks.begin()
1845  : std::stable_partition(
1846  Blocks.begin(), Blocks.end(),
1847  [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1848 
1849  // Before we erase the list of unlooped blocks, build a set of them.
1850  SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1851  if (LoopBlockSet.empty())
1852  UnloopedBlocks.insert(PH);
1853 
1854  // Now erase these blocks from the loop.
1855  for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1856  L.getBlocksSet().erase(BB);
1857  Blocks.erase(BlocksSplitI, Blocks.end());
1858 
1859  // Sort the exits in ascending loop depth, we'll work backwards across these
1860  // to process them inside out.
1861  llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1862  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1863  });
1864 
1865  // We'll build up a set for each exit loop.
1866  SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1867  Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1868 
1869  auto RemoveUnloopedBlocksFromLoop =
1870  [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1871  for (auto *BB : UnloopedBlocks)
1872  L.getBlocksSet().erase(BB);
1874  return UnloopedBlocks.count(BB);
1875  });
1876  };
1877 
1879  while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1880  assert(Worklist.empty() && "Didn't clear worklist!");
1881  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1882 
1883  // Grab the next exit block, in decreasing loop depth order.
1884  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1885  Loop &ExitL = *LI.getLoopFor(ExitBB);
1886  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1887 
1888  // Erase all of the unlooped blocks from the loops between the previous
1889  // exit loop and this exit loop. This works because the ExitInLoops list is
1890  // sorted in increasing order of loop depth and thus we visit loops in
1891  // decreasing order of loop depth.
1892  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1893  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1894 
1895  // Walk the CFG back until we hit the cloned PH adding everything reachable
1896  // and in the unlooped set to this exit block's loop.
1897  Worklist.push_back(ExitBB);
1898  do {
1899  BasicBlock *BB = Worklist.pop_back_val();
1900  // We can stop recursing at the cloned preheader (if we get there).
1901  if (BB == PH)
1902  continue;
1903 
1904  for (BasicBlock *PredBB : predecessors(BB)) {
1905  // If this pred has already been moved to our set or is part of some
1906  // (inner) loop, no update needed.
1907  if (!UnloopedBlocks.erase(PredBB)) {
1908  assert((NewExitLoopBlocks.count(PredBB) ||
1909  ExitL.contains(LI.getLoopFor(PredBB))) &&
1910  "Predecessor not in a nested loop (or already visited)!");
1911  continue;
1912  }
1913 
1914  // We just insert into the loop set here. We'll add these blocks to the
1915  // exit loop after we build up the set in a deterministic order rather
1916  // than the predecessor-influenced visit order.
1917  bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1918  (void)Inserted;
1919  assert(Inserted && "Should only visit an unlooped block once!");
1920 
1921  // And recurse through to its predecessors.
1922  Worklist.push_back(PredBB);
1923  }
1924  } while (!Worklist.empty());
1925 
1926  // If blocks in this exit loop were directly part of the original loop (as
1927  // opposed to a child loop) update the map to point to this exit loop. This
1928  // just updates a map and so the fact that the order is unstable is fine.
1929  for (auto *BB : NewExitLoopBlocks)
1930  if (Loop *BBL = LI.getLoopFor(BB))
1931  if (BBL == &L || !L.contains(BBL))
1932  LI.changeLoopFor(BB, &ExitL);
1933 
1934  // We will remove the remaining unlooped blocks from this loop in the next
1935  // iteration or below.
1936  NewExitLoopBlocks.clear();
1937  }
1938 
1939  // Any remaining unlooped blocks are no longer part of any loop unless they
1940  // are part of some child loop.
1941  for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1942  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1943  for (auto *BB : UnloopedBlocks)
1944  if (Loop *BBL = LI.getLoopFor(BB))
1945  if (BBL == &L || !L.contains(BBL))
1946  LI.changeLoopFor(BB, nullptr);
1947 
1948  // Sink all the child loops whose headers are no longer in the loop set to
1949  // the parent (or to be top level loops). We reach into the loop and directly
1950  // update its subloop vector to make this batch update efficient.
1951  auto &SubLoops = L.getSubLoopsVector();
1952  auto SubLoopsSplitI =
1953  LoopBlockSet.empty()
1954  ? SubLoops.begin()
1955  : std::stable_partition(
1956  SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1957  return LoopBlockSet.count(SubL->getHeader());
1958  });
1959  for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1960  HoistedLoops.push_back(HoistedL);
1961  HoistedL->setParentLoop(nullptr);
1962 
1963  // To compute the new parent of this hoisted loop we look at where we
1964  // placed the preheader above. We can't lookup the header itself because we
1965  // retained the mapping from the header to the hoisted loop. But the
1966  // preheader and header should have the exact same new parent computed
1967  // based on the set of exit blocks from the original loop as the preheader
1968  // is a predecessor of the header and so reached in the reverse walk. And
1969  // because the loops were all in simplified form the preheader of the
1970  // hoisted loop can't be part of some *other* loop.
1971  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1972  NewParentL->addChildLoop(HoistedL);
1973  else
1974  LI.addTopLevelLoop(HoistedL);
1975  }
1976  SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1977 
1978  // Actually delete the loop if nothing remained within it.
1979  if (Blocks.empty()) {
1980  assert(SubLoops.empty() &&
1981  "Failed to remove all subloops from the original loop!");
1982  if (Loop *ParentL = L.getParentLoop())
1983  ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1984  else
1985  LI.removeLoop(llvm::find(LI, &L));
1986  // markLoopAsDeleted for L should be triggered by the caller (it is typically
1987  // done by using the UnswitchCB callback).
1988  LI.destroy(&L);
1989  return false;
1990  }
1991 
1992  return true;
1993 }
1994 
1995 /// Helper to visit a dominator subtree, invoking a callable on each node.
1996 ///
1997 /// Returning false at any point will stop walking past that node of the tree.
1998 template <typename CallableT>
1999 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2000  SmallVector<DomTreeNode *, 4> DomWorklist;
2001  DomWorklist.push_back(DT[BB]);
2002 #ifndef NDEBUG
2004  Visited.insert(DT[BB]);
2005 #endif
2006  do {
2007  DomTreeNode *N = DomWorklist.pop_back_val();
2008 
2009  // Visit this node.
2010  if (!Callable(N->getBlock()))
2011  continue;
2012 
2013  // Accumulate the child nodes.
2014  for (DomTreeNode *ChildN : *N) {
2015  assert(Visited.insert(ChildN).second &&
2016  "Cannot visit a node twice when walking a tree!");
2017  DomWorklist.push_back(ChildN);
2018  }
2019  } while (!DomWorklist.empty());
2020 }
2021 
2023  Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2024  SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
2025  DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2026  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2027  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2028  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2029  auto *ParentBB = TI.getParent();
2030  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2031  SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2032 
2033  // We can only unswitch switches, conditional branches with an invariant
2034  // condition, or combining invariant conditions with an instruction or
2035  // partially invariant instructions.
2036  assert((SI || (BI && BI->isConditional())) &&
2037  "Can only unswitch switches and conditional branch!");
2038  bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2039  bool FullUnswitch =
2040  SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant);
2041  if (FullUnswitch)
2042  assert(Invariants.size() == 1 &&
2043  "Cannot have other invariants with full unswitching!");
2044  else
2045  assert(isa<Instruction>(BI->getCondition()) &&
2046  "Partial unswitching requires an instruction as the condition!");
2047 
2048  if (MSSAU && VerifyMemorySSA)
2049  MSSAU->getMemorySSA()->verifyMemorySSA();
2050 
2051  // Constant and BBs tracking the cloned and continuing successor. When we are
2052  // unswitching the entire condition, this can just be trivially chosen to
2053  // unswitch towards `true`. However, when we are unswitching a set of
2054  // invariants combined with `and` or `or` or partially invariant instructions,
2055  // the combining operation determines the best direction to unswitch: we want
2056  // to unswitch the direction that will collapse the branch.
2057  bool Direction = true;
2058  int ClonedSucc = 0;
2059  if (!FullUnswitch) {
2060  Value *Cond = BI->getCondition();
2061  (void)Cond;
2063  PartiallyInvariant) &&
2064  "Only `or`, `and`, an `select`, partially invariant instructions "
2065  "can combine invariants being unswitched.");
2066  if (!match(BI->getCondition(), m_LogicalOr())) {
2067  if (match(BI->getCondition(), m_LogicalAnd()) ||
2068  (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2069  Direction = false;
2070  ClonedSucc = 1;
2071  }
2072  }
2073  }
2074 
2075  BasicBlock *RetainedSuccBB =
2076  BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2077  SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2078  if (BI)
2079  UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2080  else
2081  for (auto Case : SI->cases())
2082  if (Case.getCaseSuccessor() != RetainedSuccBB)
2083  UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2084 
2085  assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2086  "Should not unswitch the same successor we are retaining!");
2087 
2088  // The branch should be in this exact loop. Any inner loop's invariant branch
2089  // should be handled by unswitching that inner loop. The caller of this
2090  // routine should filter out any candidates that remain (but were skipped for
2091  // whatever reason).
2092  assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2093 
2094  // Compute the parent loop now before we start hacking on things.
2095  Loop *ParentL = L.getParentLoop();
2096  // Get blocks in RPO order for MSSA update, before changing the CFG.
2097  LoopBlocksRPO LBRPO(&L);
2098  if (MSSAU)
2099  LBRPO.perform(&LI);
2100 
2101  // Compute the outer-most loop containing one of our exit blocks. This is the
2102  // furthest up our loopnest which can be mutated, which we will use below to
2103  // update things.
2104  Loop *OuterExitL = &L;
2105  for (auto *ExitBB : ExitBlocks) {
2106  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2107  if (!NewOuterExitL) {
2108  // We exited the entire nest with this block, so we're done.
2109  OuterExitL = nullptr;
2110  break;
2111  }
2112  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2113  OuterExitL = NewOuterExitL;
2114  }
2115 
2116  // At this point, we're definitely going to unswitch something so invalidate
2117  // any cached information in ScalarEvolution for the outer most loop
2118  // containing an exit block and all nested loops.
2119  if (SE) {
2120  if (OuterExitL)
2121  SE->forgetLoop(OuterExitL);
2122  else
2123  SE->forgetTopmostLoop(&L);
2124  }
2125 
2126  // If the edge from this terminator to a successor dominates that successor,
2127  // store a map from each block in its dominator subtree to it. This lets us
2128  // tell when cloning for a particular successor if a block is dominated by
2129  // some *other* successor with a single data structure. We use this to
2130  // significantly reduce cloning.
2132  for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2133  makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2134  if (SuccBB->getUniquePredecessor() ||
2135  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2136  return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2137  }))
2138  visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2139  DominatingSucc[BB] = SuccBB;
2140  return true;
2141  });
2142 
2143  // Split the preheader, so that we know that there is a safe place to insert
2144  // the conditional branch. We will change the preheader to have a conditional
2145  // branch on LoopCond. The original preheader will become the split point
2146  // between the unswitched versions, and we will have a new preheader for the
2147  // original loop.
2148  BasicBlock *SplitBB = L.getLoopPreheader();
2149  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2150 
2151  // Keep track of the dominator tree updates needed.
2153 
2154  // Clone the loop for each unswitched successor.
2156  VMaps.reserve(UnswitchedSuccBBs.size());
2158  for (auto *SuccBB : UnswitchedSuccBBs) {
2159  VMaps.emplace_back(new ValueToValueMapTy());
2160  ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2161  L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2162  DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2163  }
2164 
2165  // Drop metadata if we may break its semantics by moving this instr into the
2166  // split block.
2167  if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2169  // Do not spend time trying to understand if we can keep it, just drop it
2170  // to save compile time.
2171  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2172  else {
2173  // It is only legal to preserve make.implicit metadata if we are
2174  // guaranteed no reach implicit null check after following this branch.
2175  ICFLoopSafetyInfo SafetyInfo;
2176  SafetyInfo.computeLoopSafetyInfo(&L);
2177  if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2178  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2179  }
2180  }
2181 
2182  // The stitching of the branched code back together depends on whether we're
2183  // doing full unswitching or not with the exception that we always want to
2184  // nuke the initial terminator placed in the split block.
2185  SplitBB->getTerminator()->eraseFromParent();
2186  if (FullUnswitch) {
2187  // Splice the terminator from the original loop and rewrite its
2188  // successors.
2189  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2190 
2191  // Keep a clone of the terminator for MSSA updates.
2192  Instruction *NewTI = TI.clone();
2193  ParentBB->getInstList().push_back(NewTI);
2194 
2195  // First wire up the moved terminator to the preheaders.
2196  if (BI) {
2197  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2198  BI->setSuccessor(ClonedSucc, ClonedPH);
2199  BI->setSuccessor(1 - ClonedSucc, LoopPH);
2200  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2201  } else {
2202  assert(SI && "Must either be a branch or switch!");
2203 
2204  // Walk the cases and directly update their successors.
2205  assert(SI->getDefaultDest() == RetainedSuccBB &&
2206  "Not retaining default successor!");
2207  SI->setDefaultDest(LoopPH);
2208  for (auto &Case : SI->cases())
2209  if (Case.getCaseSuccessor() == RetainedSuccBB)
2210  Case.setSuccessor(LoopPH);
2211  else
2212  Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2213 
2214  // We need to use the set to populate domtree updates as even when there
2215  // are multiple cases pointing at the same successor we only want to
2216  // remove and insert one edge in the domtree.
2217  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2218  DTUpdates.push_back(
2219  {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2220  }
2221 
2222  if (MSSAU) {
2223  DT.applyUpdates(DTUpdates);
2224  DTUpdates.clear();
2225 
2226  // Remove all but one edge to the retained block and all unswitched
2227  // blocks. This is to avoid having duplicate entries in the cloned Phis,
2228  // when we know we only keep a single edge for each case.
2229  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2230  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2231  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2232 
2233  for (auto &VMap : VMaps)
2234  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2235  /*IgnoreIncomingWithNoClones=*/true);
2236  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2237 
2238  // Remove all edges to unswitched blocks.
2239  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2240  MSSAU->removeEdge(ParentBB, SuccBB);
2241  }
2242 
2243  // Now unhook the successor relationship as we'll be replacing
2244  // the terminator with a direct branch. This is much simpler for branches
2245  // than switches so we handle those first.
2246  if (BI) {
2247  // Remove the parent as a predecessor of the unswitched successor.
2248  assert(UnswitchedSuccBBs.size() == 1 &&
2249  "Only one possible unswitched block for a branch!");
2250  BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2251  UnswitchedSuccBB->removePredecessor(ParentBB,
2252  /*KeepOneInputPHIs*/ true);
2253  DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2254  } else {
2255  // Note that we actually want to remove the parent block as a predecessor
2256  // of *every* case successor. The case successor is either unswitched,
2257  // completely eliminating an edge from the parent to that successor, or it
2258  // is a duplicate edge to the retained successor as the retained successor
2259  // is always the default successor and as we'll replace this with a direct
2260  // branch we no longer need the duplicate entries in the PHI nodes.
2261  SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2262  assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2263  "Not retaining default successor!");
2264  for (auto &Case : NewSI->cases())
2265  Case.getCaseSuccessor()->removePredecessor(
2266  ParentBB,
2267  /*KeepOneInputPHIs*/ true);
2268 
2269  // We need to use the set to populate domtree updates as even when there
2270  // are multiple cases pointing at the same successor we only want to
2271  // remove and insert one edge in the domtree.
2272  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2273  DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2274  }
2275 
2276  // After MSSAU update, remove the cloned terminator instruction NewTI.
2277  ParentBB->getTerminator()->eraseFromParent();
2278 
2279  // Create a new unconditional branch to the continuing block (as opposed to
2280  // the one cloned).
2281  BranchInst::Create(RetainedSuccBB, ParentBB);
2282  } else {
2283  assert(BI && "Only branches have partial unswitching.");
2284  assert(UnswitchedSuccBBs.size() == 1 &&
2285  "Only one possible unswitched block for a branch!");
2286  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2287  // When doing a partial unswitch, we have to do a bit more work to build up
2288  // the branch in the split block.
2289  if (PartiallyInvariant)
2291  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2292  else
2293  buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
2294  *ClonedPH, *LoopPH);
2295  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2296 
2297  if (MSSAU) {
2298  DT.applyUpdates(DTUpdates);
2299  DTUpdates.clear();
2300 
2301  // Perform MSSA cloning updates.
2302  for (auto &VMap : VMaps)
2303  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2304  /*IgnoreIncomingWithNoClones=*/true);
2305  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2306  }
2307  }
2308 
2309  // Apply the updates accumulated above to get an up-to-date dominator tree.
2310  DT.applyUpdates(DTUpdates);
2311 
2312  // Now that we have an accurate dominator tree, first delete the dead cloned
2313  // blocks so that we can accurately build any cloned loops. It is important to
2314  // not delete the blocks from the original loop yet because we still want to
2315  // reference the original loop to understand the cloned loop's structure.
2316  deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2317 
2318  // Build the cloned loop structure itself. This may be substantially
2319  // different from the original structure due to the simplified CFG. This also
2320  // handles inserting all the cloned blocks into the correct loops.
2321  SmallVector<Loop *, 4> NonChildClonedLoops;
2322  for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2323  buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2324 
2325  // Now that our cloned loops have been built, we can update the original loop.
2326  // First we delete the dead blocks from it and then we rebuild the loop
2327  // structure taking these deletions into account.
2328  deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);
2329 
2330  if (MSSAU && VerifyMemorySSA)
2331  MSSAU->getMemorySSA()->verifyMemorySSA();
2332 
2333  SmallVector<Loop *, 4> HoistedLoops;
2334  bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
2335 
2336  if (MSSAU && VerifyMemorySSA)
2337  MSSAU->getMemorySSA()->verifyMemorySSA();
2338 
2339  // This transformation has a high risk of corrupting the dominator tree, and
2340  // the below steps to rebuild loop structures will result in hard to debug
2341  // errors in that case so verify that the dominator tree is sane first.
2342  // FIXME: Remove this when the bugs stop showing up and rely on existing
2343  // verification steps.
2345 
2346  if (BI && !PartiallyInvariant) {
2347  // If we unswitched a branch which collapses the condition to a known
2348  // constant we want to replace all the uses of the invariants within both
2349  // the original and cloned blocks. We do this here so that we can use the
2350  // now updated dominator tree to identify which side the users are on.
2351  assert(UnswitchedSuccBBs.size() == 1 &&
2352  "Only one possible unswitched block for a branch!");
2353  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2354 
2355  // When considering multiple partially-unswitched invariants
2356  // we cant just go replace them with constants in both branches.
2357  //
2358  // For 'AND' we infer that true branch ("continue") means true
2359  // for each invariant operand.
2360  // For 'OR' we can infer that false branch ("continue") means false
2361  // for each invariant operand.
2362  // So it happens that for multiple-partial case we dont replace
2363  // in the unswitched branch.
2364  bool ReplaceUnswitched =
2365  FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2366 
2367  ConstantInt *UnswitchedReplacement =
2370  ConstantInt *ContinueReplacement =
2373  for (Value *Invariant : Invariants)
2374  // Use make_early_inc_range here as set invalidates the iterator.
2375  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2376  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2377  if (!UserI)
2378  continue;
2379 
2380  // Replace it with the 'continue' side if in the main loop body, and the
2381  // unswitched if in the cloned blocks.
2382  if (DT.dominates(LoopPH, UserI->getParent()))
2383  U.set(ContinueReplacement);
2384  else if (ReplaceUnswitched &&
2385  DT.dominates(ClonedPH, UserI->getParent()))
2386  U.set(UnswitchedReplacement);
2387  }
2388  }
2389 
2390  // We can change which blocks are exit blocks of all the cloned sibling
2391  // loops, the current loop, and any parent loops which shared exit blocks
2392  // with the current loop. As a consequence, we need to re-form LCSSA for
2393  // them. But we shouldn't need to re-form LCSSA for any child loops.
2394  // FIXME: This could be made more efficient by tracking which exit blocks are
2395  // new, and focusing on them, but that isn't likely to be necessary.
2396  //
2397  // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2398  // loop nest and update every loop that could have had its exits changed. We
2399  // also need to cover any intervening loops. We add all of these loops to
2400  // a list and sort them by loop depth to achieve this without updating
2401  // unnecessary loops.
2402  auto UpdateLoop = [&](Loop &UpdateL) {
2403 #ifndef NDEBUG
2404  UpdateL.verifyLoop();
2405  for (Loop *ChildL : UpdateL) {
2406  ChildL->verifyLoop();
2407  assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2408  "Perturbed a child loop's LCSSA form!");
2409  }
2410 #endif
2411  // First build LCSSA for this loop so that we can preserve it when
2412  // forming dedicated exits. We don't want to perturb some other loop's
2413  // LCSSA while doing that CFG edit.
2414  formLCSSA(UpdateL, DT, &LI, SE);
2415 
2416  // For loops reached by this loop's original exit blocks we may
2417  // introduced new, non-dedicated exits. At least try to re-form dedicated
2418  // exits for these loops. This may fail if they couldn't have dedicated
2419  // exits to start with.
2420  formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2421  };
2422 
2423  // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2424  // and we can do it in any order as they don't nest relative to each other.
2425  //
2426  // Also check if any of the loops we have updated have become top-level loops
2427  // as that will necessitate widening the outer loop scope.
2428  for (Loop *UpdatedL :
2429  llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2430  UpdateLoop(*UpdatedL);
2431  if (UpdatedL->isOutermost())
2432  OuterExitL = nullptr;
2433  }
2434  if (IsStillLoop) {
2435  UpdateLoop(L);
2436  if (L.isOutermost())
2437  OuterExitL = nullptr;
2438  }
2439 
2440  // If the original loop had exit blocks, walk up through the outer most loop
2441  // of those exit blocks to update LCSSA and form updated dedicated exits.
2442  if (OuterExitL != &L)
2443  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2444  OuterL = OuterL->getParentLoop())
2445  UpdateLoop(*OuterL);
2446 
2447 #ifndef NDEBUG
2448  // Verify the entire loop structure to catch any incorrect updates before we
2449  // progress in the pass pipeline.
2450  LI.verify(DT);
2451 #endif
2452 
2453  // Now that we've unswitched something, make callbacks to report the changes.
2454  // For that we need to merge together the updated loops and the cloned loops
2455  // and check whether the original loop survived.
2456  SmallVector<Loop *, 4> SibLoops;
2457  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2458  if (UpdatedL->getParentLoop() == ParentL)
2459  SibLoops.push_back(UpdatedL);
2460  UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2461 
2462  if (MSSAU && VerifyMemorySSA)
2463  MSSAU->getMemorySSA()->verifyMemorySSA();
2464 
2465  if (BI)
2466  ++NumBranches;
2467  else
2468  ++NumSwitches;
2469 }
2470 
2471 /// Recursively compute the cost of a dominator subtree based on the per-block
2472 /// cost map provided.
2473 ///
2474 /// The recursive computation is memozied into the provided DT-indexed cost map
2475 /// to allow querying it for most nodes in the domtree without it becoming
2476 /// quadratic.
2478  DomTreeNode &N,
2481  // Don't accumulate cost (or recurse through) blocks not in our block cost
2482  // map and thus not part of the duplication cost being considered.
2483  auto BBCostIt = BBCostMap.find(N.getBlock());
2484  if (BBCostIt == BBCostMap.end())
2485  return 0;
2486 
2487  // Lookup this node to see if we already computed its cost.
2488  auto DTCostIt = DTCostMap.find(&N);
2489  if (DTCostIt != DTCostMap.end())
2490  return DTCostIt->second;
2491 
2492  // If not, we have to compute it. We can't use insert above and update
2493  // because computing the cost may insert more things into the map.
2494  InstructionCost Cost = std::accumulate(
2495  N.begin(), N.end(), BBCostIt->second,
2496  [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2497  return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2498  });
2499  bool Inserted = DTCostMap.insert({&N, Cost}).second;
2500  (void)Inserted;
2501  assert(Inserted && "Should not insert a node while visiting children!");
2502  return Cost;
2503 }
2504 
2505 /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2506 /// making the following replacement:
2507 ///
2508 /// --code before guard--
2509 /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2510 /// --code after guard--
2511 ///
2512 /// into
2513 ///
2514 /// --code before guard--
2515 /// br i1 %cond, label %guarded, label %deopt
2516 ///
2517 /// guarded:
2518 /// --code after guard--
2519 ///
2520 /// deopt:
2521 /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2522 /// unreachable
2523 ///
2524 /// It also makes all relevant DT and LI updates, so that all structures are in
2525 /// valid state after this transform.
2526 static BranchInst *
2528  SmallVectorImpl<BasicBlock *> &ExitBlocks,
2529  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2531  LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2532  BasicBlock *CheckBB = GI->getParent();
2533 
2534  if (MSSAU && VerifyMemorySSA)
2535  MSSAU->getMemorySSA()->verifyMemorySSA();
2536 
2537  // Remove all CheckBB's successors from DomTree. A block can be seen among
2538  // successors more than once, but for DomTree it should be added only once.
2539  SmallPtrSet<BasicBlock *, 4> Successors;
2540  for (auto *Succ : successors(CheckBB))
2541  if (Successors.insert(Succ).second)
2542  DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2543 
2544  Instruction *DeoptBlockTerm =
2545  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2546  BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2547  // SplitBlockAndInsertIfThen inserts control flow that branches to
2548  // DeoptBlockTerm if the condition is true. We want the opposite.
2549  CheckBI->swapSuccessors();
2550 
2551  BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2552  GuardedBlock->setName("guarded");
2553  CheckBI->getSuccessor(1)->setName("deopt");
2554  BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2555 
2556  // We now have a new exit block.
2557  ExitBlocks.push_back(CheckBI->getSuccessor(1));
2558 
2559  if (MSSAU)
2560  MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2561 
2562  GI->moveBefore(DeoptBlockTerm);
2564 
2565  // Add new successors of CheckBB into DomTree.
2566  for (auto *Succ : successors(CheckBB))
2567  DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2568 
2569  // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2570  // successors.
2571  for (auto *Succ : Successors)
2572  DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2573 
2574  // Make proper changes to DT.
2575  DT.applyUpdates(DTUpdates);
2576  // Inform LI of a new loop block.
2577  L.addBasicBlockToLoop(GuardedBlock, LI);
2578 
2579  if (MSSAU) {
2580  MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2581  MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2582  if (VerifyMemorySSA)
2583  MSSAU->getMemorySSA()->verifyMemorySSA();
2584  }
2585 
2586  ++NumGuards;
2587  return CheckBI;
2588 }
2589 
2590 /// Cost multiplier is a way to limit potentially exponential behavior
2591 /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2592 /// candidates available. Also accounting for the number of "sibling" loops with
2593 /// the idea to account for previous unswitches that already happened on this
2594 /// cluster of loops. There was an attempt to keep this formula simple,
2595 /// just enough to limit the worst case behavior. Even if it is not that simple
2596 /// now it is still not an attempt to provide a detailed heuristic size
2597 /// prediction.
2598 ///
2599 /// TODO: Make a proper accounting of "explosion" effect for all kinds of
2600 /// unswitch candidates, making adequate predictions instead of wild guesses.
2601 /// That requires knowing not just the number of "remaining" candidates but
2602 /// also costs of unswitching for each of these candidates.
2604  Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2606  UnswitchCandidates) {
2607 
2608  // Guards and other exiting conditions do not contribute to exponential
2609  // explosion as soon as they dominate the latch (otherwise there might be
2610  // another path to the latch remaining that does not allow to eliminate the
2611  // loop copy on unswitch).
2612  BasicBlock *Latch = L.getLoopLatch();
2613  BasicBlock *CondBlock = TI.getParent();
2614  if (DT.dominates(CondBlock, Latch) &&
2615  (isGuard(&TI) ||
2616  llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2617  return L.contains(SuccBB);
2618  }) <= 1)) {
2619  NumCostMultiplierSkipped++;
2620  return 1;
2621  }
2622 
2623  auto *ParentL = L.getParentLoop();
2624  int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2625  : std::distance(LI.begin(), LI.end()));
2626  // Count amount of clones that all the candidates might cause during
2627  // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2628  int UnswitchedClones = 0;
2629  for (auto Candidate : UnswitchCandidates) {
2630  Instruction *CI = Candidate.first;
2631  BasicBlock *CondBlock = CI->getParent();
2632  bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2633  if (isGuard(CI)) {
2634  if (!SkipExitingSuccessors)
2635  UnswitchedClones++;
2636  continue;
2637  }
2638  int NonExitingSuccessors = llvm::count_if(
2639  successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2640  return !SkipExitingSuccessors || L.contains(SuccBB);
2641  });
2642  UnswitchedClones += Log2_32(NonExitingSuccessors);
2643  }
2644 
2645  // Ignore up to the "unscaled candidates" number of unswitch candidates
2646  // when calculating the power-of-two scaling of the cost. The main idea
2647  // with this control is to allow a small number of unswitches to happen
2648  // and rely more on siblings multiplier (see below) when the number
2649  // of candidates is small.
2650  unsigned ClonesPower =
2651  std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2652 
2653  // Allowing top-level loops to spread a bit more than nested ones.
2654  int SiblingsMultiplier =
2655  std::max((ParentL ? SiblingsCount
2656  : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2657  1);
2658  // Compute the cost multiplier in a way that won't overflow by saturating
2659  // at an upper bound.
2660  int CostMultiplier;
2661  if (ClonesPower > Log2_32(UnswitchThreshold) ||
2662  SiblingsMultiplier > UnswitchThreshold)
2663  CostMultiplier = UnswitchThreshold;
2664  else
2665  CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2666  (int)UnswitchThreshold);
2667 
2668  LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2669  << " (siblings " << SiblingsMultiplier << " * clones "
2670  << (1 << ClonesPower) << ")"
2671  << " for unswitch candidate: " << TI << "\n");
2672  return CostMultiplier;
2673 }
2674 
2676  Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2678  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2679  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2680  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2681  // Collect all invariant conditions within this loop (as opposed to an inner
2682  // loop which would be handled when visiting that inner loop).
2684  UnswitchCandidates;
2685 
2686  // Whether or not we should also collect guards in the loop.
2687  bool CollectGuards = false;
2688  if (UnswitchGuards) {
2689  auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2690  Intrinsic::getName(Intrinsic::experimental_guard));
2691  if (GuardDecl && !GuardDecl->use_empty())
2692  CollectGuards = true;
2693  }
2694 
2695  IVConditionInfo PartialIVInfo;
2696  for (auto *BB : L.blocks()) {
2697  if (LI.getLoopFor(BB) != &L)
2698  continue;
2699 
2700  if (CollectGuards)
2701  for (auto &I : *BB)
2702  if (isGuard(&I)) {
2703  auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2704  // TODO: Support AND, OR conditions and partial unswitching.
2705  if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2706  UnswitchCandidates.push_back({&I, {Cond}});
2707  }
2708 
2709  if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2710  // We can only consider fully loop-invariant switch conditions as we need
2711  // to completely eliminate the switch after unswitching.
2712  if (!isa<Constant>(SI->getCondition()) &&
2713  L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2714  UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2715  continue;
2716  }
2717 
2718  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2719  if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2720  BI->getSuccessor(0) == BI->getSuccessor(1))
2721  continue;
2722 
2723  // If BI's condition is 'select _, true, false', simplify it to confuse
2724  // matchers
2725  Value *Cond = BI->getCondition(), *CondNext;
2726  while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
2727  Cond = CondNext;
2728  BI->setCondition(Cond);
2729 
2730  if (L.isLoopInvariant(BI->getCondition())) {
2731  UnswitchCandidates.push_back({BI, {BI->getCondition()}});
2732  continue;
2733  }
2734 
2735  Instruction &CondI = *cast<Instruction>(BI->getCondition());
2736  if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2737  TinyPtrVector<Value *> Invariants =
2739  if (Invariants.empty())
2740  continue;
2741 
2742  UnswitchCandidates.push_back({BI, std::move(Invariants)});
2743  continue;
2744  }
2745  }
2746 
2747  Instruction *PartialIVCondBranch = nullptr;
2748  if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2749  !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2750  return TerminatorAndInvariants.first == L.getHeader()->getTerminator();
2751  })) {
2752  MemorySSA *MSSA = MSSAU->getMemorySSA();
2753  if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2754  LLVM_DEBUG(
2755  dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2756  << *Info->InstToDuplicate[0] << "\n");
2757  PartialIVInfo = *Info;
2758  PartialIVCondBranch = L.getHeader()->getTerminator();
2759  TinyPtrVector<Value *> ValsToDuplicate;
2760  for (auto *Inst : Info->InstToDuplicate)
2761  ValsToDuplicate.push_back(Inst);
2762  UnswitchCandidates.push_back(
2763  {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2764  }
2765  }
2766 
2767  // If we didn't find any candidates, we're done.
2768  if (UnswitchCandidates.empty())
2769  return false;
2770 
2771  // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2772  // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2773  // irreducible control flow into reducible control flow and introduce new
2774  // loops "out of thin air". If we ever discover important use cases for doing
2775  // this, we can add support to loop unswitch, but it is a lot of complexity
2776  // for what seems little or no real world benefit.
2777  LoopBlocksRPO RPOT(&L);
2778  RPOT.perform(&LI);
2779  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2780  return false;
2781 
2782  SmallVector<BasicBlock *, 4> ExitBlocks;
2783  L.getUniqueExitBlocks(ExitBlocks);
2784 
2785  // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
2786  // instruction as we don't know how to split those exit blocks.
2787  // FIXME: We should teach SplitBlock to handle this and remove this
2788  // restriction.
2789  for (auto *ExitBB : ExitBlocks) {
2790  auto *I = ExitBB->getFirstNonPHI();
2791  if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
2792  LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
2793  "in exit block\n");
2794  return false;
2795  }
2796  }
2797 
2798  LLVM_DEBUG(
2799  dbgs() << "Considering " << UnswitchCandidates.size()
2800  << " non-trivial loop invariant conditions for unswitching.\n");
2801 
2802  // Given that unswitching these terminators will require duplicating parts of
2803  // the loop, so we need to be able to model that cost. Compute the ephemeral
2804  // values and set up a data structure to hold per-BB costs. We cache each
2805  // block's cost so that we don't recompute this when considering different
2806  // subsets of the loop for duplication during unswitching.
2808  CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2810 
2811  // Compute the cost of each block, as well as the total loop cost. Also, bail
2812  // out if we see instructions which are incompatible with loop unswitching
2813  // (convergent, noduplicate, or cross-basic-block tokens).
2814  // FIXME: We might be able to safely handle some of these in non-duplicated
2815  // regions.
2817  L.getHeader()->getParent()->hasMinSize()
2820  InstructionCost LoopCost = 0;
2821  for (auto *BB : L.blocks()) {
2822  InstructionCost Cost = 0;
2823  for (auto &I : *BB) {
2824  if (EphValues.count(&I))
2825  continue;
2826 
2827  if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2828  return false;
2829  if (auto *CB = dyn_cast<CallBase>(&I))
2830  if (CB->isConvergent() || CB->cannotDuplicate())
2831  return false;
2832 
2833  Cost += TTI.getUserCost(&I, CostKind);
2834  }
2835  assert(Cost >= 0 && "Must not have negative costs!");
2836  LoopCost += Cost;
2837  assert(LoopCost >= 0 && "Must not have negative loop costs!");
2838  BBCostMap[BB] = Cost;
2839  }
2840  LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
2841 
2842  // Now we find the best candidate by searching for the one with the following
2843  // properties in order:
2844  //
2845  // 1) An unswitching cost below the threshold
2846  // 2) The smallest number of duplicated unswitch candidates (to avoid
2847  // creating redundant subsequent unswitching)
2848  // 3) The smallest cost after unswitching.
2849  //
2850  // We prioritize reducing fanout of unswitch candidates provided the cost
2851  // remains below the threshold because this has a multiplicative effect.
2852  //
2853  // This requires memoizing each dominator subtree to avoid redundant work.
2854  //
2855  // FIXME: Need to actually do the number of candidates part above.
2857  // Given a terminator which might be unswitched, computes the non-duplicated
2858  // cost for that terminator.
2859  auto ComputeUnswitchedCost = [&](Instruction &TI,
2860  bool FullUnswitch) -> InstructionCost {
2861  BasicBlock &BB = *TI.getParent();
2863 
2864  InstructionCost Cost = 0;
2865  for (BasicBlock *SuccBB : successors(&BB)) {
2866  // Don't count successors more than once.
2867  if (!Visited.insert(SuccBB).second)
2868  continue;
2869 
2870  // If this is a partial unswitch candidate, then it must be a conditional
2871  // branch with a condition of either `or`, `and`, their corresponding
2872  // select forms or partially invariant instructions. In that case, one of
2873  // the successors is necessarily duplicated, so don't even try to remove
2874  // its cost.
2875  if (!FullUnswitch) {
2876  auto &BI = cast<BranchInst>(TI);
2877  if (match(BI.getCondition(), m_LogicalAnd())) {
2878  if (SuccBB == BI.getSuccessor(1))
2879  continue;
2880  } else if (match(BI.getCondition(), m_LogicalOr())) {
2881  if (SuccBB == BI.getSuccessor(0))
2882  continue;
2883  } else if ((PartialIVInfo.KnownValue->isOneValue() &&
2884  SuccBB == BI.getSuccessor(0)) ||
2885  (!PartialIVInfo.KnownValue->isOneValue() &&
2886  SuccBB == BI.getSuccessor(1)))
2887  continue;
2888  }
2889 
2890  // This successor's domtree will not need to be duplicated after
2891  // unswitching if the edge to the successor dominates it (and thus the
2892  // entire tree). This essentially means there is no other path into this
2893  // subtree and so it will end up live in only one clone of the loop.
2894  if (SuccBB->getUniquePredecessor() ||
2895  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2896  return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2897  })) {
2898  Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2899  assert(Cost <= LoopCost &&
2900  "Non-duplicated cost should never exceed total loop cost!");
2901  }
2902  }
2903 
2904  // Now scale the cost by the number of unique successors minus one. We
2905  // subtract one because there is already at least one copy of the entire
2906  // loop. This is computing the new cost of unswitching a condition.
2907  // Note that guards always have 2 unique successors that are implicit and
2908  // will be materialized if we decide to unswitch it.
2909  int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2910  assert(SuccessorsCount > 1 &&
2911  "Cannot unswitch a condition without multiple distinct successors!");
2912  return (LoopCost - Cost) * (SuccessorsCount - 1);
2913  };
2914  Instruction *BestUnswitchTI = nullptr;
2915  InstructionCost BestUnswitchCost = 0;
2916  ArrayRef<Value *> BestUnswitchInvariants;
2917  for (auto &TerminatorAndInvariants : UnswitchCandidates) {
2918  Instruction &TI = *TerminatorAndInvariants.first;
2919  ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
2920  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2921  InstructionCost CandidateCost = ComputeUnswitchedCost(
2922  TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
2923  Invariants[0] == BI->getCondition()));
2924  // Calculate cost multiplier which is a tool to limit potentially
2925  // exponential behavior of loop-unswitch.
2927  int CostMultiplier =
2928  CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2929  assert(
2930  (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
2931  "cost multiplier needs to be in the range of 1..UnswitchThreshold");
2932  CandidateCost *= CostMultiplier;
2933  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2934  << " (multiplier: " << CostMultiplier << ")"
2935  << " for unswitch candidate: " << TI << "\n");
2936  } else {
2937  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2938  << " for unswitch candidate: " << TI << "\n");
2939  }
2940 
2941  if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2942  BestUnswitchTI = &TI;
2943  BestUnswitchCost = CandidateCost;
2944  BestUnswitchInvariants = Invariants;
2945  }
2946  }
2947  assert(BestUnswitchTI && "Failed to find loop unswitch candidate");
2948 
2949  if (BestUnswitchCost >= UnswitchThreshold) {
2950  LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
2951  << BestUnswitchCost << "\n");
2952  return false;
2953  }
2954 
2955  if (BestUnswitchTI != PartialIVCondBranch)
2956  PartialIVInfo.InstToDuplicate.clear();
2957 
2958  // If the best candidate is a guard, turn it into a branch.
2959  if (isGuard(BestUnswitchTI))
2960  BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
2961  ExitBlocks, DT, LI, MSSAU);
2962 
2963  LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
2964  << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
2965  << "\n");
2966  unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
2967  ExitBlocks, PartialIVInfo, DT, LI, AC,
2968  UnswitchCB, SE, MSSAU, DestroyLoopCB);
2969  return true;
2970 }
2971 
2972 /// Unswitch control flow predicated on loop invariant conditions.
2973 ///
2974 /// This first hoists all branches or switches which are trivial (IE, do not
2975 /// require duplicating any part of the loop) out of the loop body. It then
2976 /// looks at other loop invariant control flows and tries to unswitch those as
2977 /// well by cloning the loop if the result is small enough.
2978 ///
2979 /// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
2980 /// also updated based on the unswitch. The `MSSA` analysis is also updated if
2981 /// valid (i.e. its use is enabled).
2982 ///
2983 /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
2984 /// true, we will attempt to do non-trivial unswitching as well as trivial
2985 /// unswitching.
2986 ///
2987 /// The `UnswitchCB` callback provided will be run after unswitching is
2988 /// complete, with the first parameter set to `true` if the provided loop
2989 /// remains a loop, and a list of new sibling loops created.
2990 ///
2991 /// If `SE` is non-null, we will update that analysis based on the unswitching
2992 /// done.
2993 static bool
2995  AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
2996  bool NonTrivial,
2997  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2998  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2999  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3000  assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3001  "Loops must be in LCSSA form before unswitching.");
3002 
3003  // Must be in loop simplified form: we need a preheader and dedicated exits.
3004  if (!L.isLoopSimplifyForm())
3005  return false;
3006 
3007  // Try trivial unswitch first before loop over other basic blocks in the loop.
3008  if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3009  // If we unswitched successfully we will want to clean up the loop before
3010  // processing it further so just mark it as unswitched and return.
3011  UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3012  return true;
3013  }
3014 
3015  // Check whether we should continue with non-trivial conditions.
3016  // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3017  // unswitching for testing and debugging.
3018  // NonTrivial: Parameter that enables non-trivial unswitching for this
3019  // invocation of the transform. But this should be allowed only
3020  // for targets without branch divergence.
3021  //
3022  // FIXME: If divergence analysis becomes available to a loop
3023  // transform, we should allow unswitching for non-trivial uniform
3024  // branches even on targets that have divergence.
3025  // https://bugs.llvm.org/show_bug.cgi?id=48819
3026  bool ContinueWithNonTrivial =
3027  EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
3028  if (!ContinueWithNonTrivial)
3029  return false;
3030 
3031  // Skip non-trivial unswitching for optsize functions.
3032  if (L.getHeader()->getParent()->hasOptSize())
3033  return false;
3034 
3035  // Skip non-trivial unswitching for loops that cannot be cloned.
3036  if (!L.isSafeToClone())
3037  return false;
3038 
3039  // For non-trivial unswitching, because it often creates new loops, we rely on
3040  // the pass manager to iterate on the loops rather than trying to immediately
3041  // reach a fixed point. There is no substantial advantage to iterating
3042  // internally, and if any of the new loops are simplified enough to contain
3043  // trivial unswitching we want to prefer those.
3044 
3045  // Try to unswitch the best invariant condition. We prefer this full unswitch to
3046  // a partial unswitch when possible below the threshold.
3047  if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
3048  DestroyLoopCB))
3049  return true;
3050 
3051  // No other opportunities to unswitch.
3052  return false;
3053 }
3054 
3057  LPMUpdater &U) {
3058  Function &F = *L.getHeader()->getParent();
3059  (void)F;
3060 
3061  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3062  << "\n");
3063 
3064  // Save the current loop name in a variable so that we can report it even
3065  // after it has been deleted.
3066  std::string LoopName = std::string(L.getName());
3067 
3068  auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3069  bool PartiallyInvariant,
3070  ArrayRef<Loop *> NewLoops) {
3071  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3072  if (!NewLoops.empty())
3073  U.addSiblingLoops(NewLoops);
3074 
3075  // If the current loop remains valid, we should revisit it to catch any
3076  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3077  if (CurrentLoopValid) {
3078  if (PartiallyInvariant) {
3079  // Mark the new loop as partially unswitched, to avoid unswitching on
3080  // the same condition again.
3081  auto &Context = L.getHeader()->getContext();
3082  MDNode *DisableUnswitchMD = MDNode::get(
3083  Context,
3084  MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3086  Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3087  {DisableUnswitchMD});
3088  L.setLoopID(NewLoopID);
3089  } else
3090  U.revisitCurrentLoop();
3091  } else
3092  U.markLoopAsDeleted(L, LoopName);
3093  };
3094 
3095  auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
3096  U.markLoopAsDeleted(L, Name);
3097  };
3098 
3100  if (AR.MSSA) {
3101  MSSAU = MemorySSAUpdater(AR.MSSA);
3102  if (VerifyMemorySSA)
3103  AR.MSSA->verifyMemorySSA();
3104  }
3105  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3106  UnswitchCB, &AR.SE,
3107  MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
3108  DestroyLoopCB))
3109  return PreservedAnalyses::all();
3110 
3111  if (AR.MSSA && VerifyMemorySSA)
3112  AR.MSSA->verifyMemorySSA();
3113 
3114  // Historically this pass has had issues with the dominator tree so verify it
3115  // in asserts builds.
3117 
3118  auto PA = getLoopPassPreservedAnalyses();
3119  if (AR.MSSA)
3120  PA.preserve<MemorySSAAnalysis>();
3121  return PA;
3122 }
3123 
3125  raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3127  OS, MapClassName2PassName);
3128 
3129  OS << "<";
3130  OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3131  OS << (Trivial ? "" : "no-") << "trivial";
3132  OS << ">";
3133 }
3134 
3135 namespace {
3136 
3137 class SimpleLoopUnswitchLegacyPass : public LoopPass {
3138  bool NonTrivial;
3139 
3140 public:
3141  static char ID; // Pass ID, replacement for typeid
3142 
3143  explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3144  : LoopPass(ID), NonTrivial(NonTrivial) {
3147  }
3148 
3149  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3150 
3151  void getAnalysisUsage(AnalysisUsage &AU) const override {
3157  }
3158 };
3159 
3160 } // end anonymous namespace
3161 
3162 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3163  if (skipLoop(L))
3164  return false;
3165 
3166  Function &F = *L->getHeader()->getParent();
3167 
3168  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
3169  << "\n");
3170 
3171  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3172  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3173  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3174  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3175  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3176  MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3177  MemorySSAUpdater MSSAU(MSSA);
3178 
3179  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3180  auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3181 
3182  auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3183  ArrayRef<Loop *> NewLoops) {
3184  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3185  for (auto *NewL : NewLoops)
3186  LPM.addLoop(*NewL);
3187 
3188  // If the current loop remains valid, re-add it to the queue. This is
3189  // a little wasteful as we'll finish processing the current loop as well,
3190  // but it is the best we can do in the old PM.
3191  if (CurrentLoopValid) {
3192  // If the current loop has been unswitched using a partially invariant
3193  // condition, we should not re-add the current loop to avoid unswitching
3194  // on the same condition again.
3195  if (!PartiallyInvariant)
3196  LPM.addLoop(*L);
3197  } else
3198  LPM.markLoopAsDeleted(*L);
3199  };
3200 
3201  auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
3202  LPM.markLoopAsDeleted(L);
3203  };
3204 
3205  if (VerifyMemorySSA)
3206  MSSA->verifyMemorySSA();
3207 
3208  bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
3209  UnswitchCB, SE, &MSSAU, DestroyLoopCB);
3210 
3211  if (VerifyMemorySSA)
3212  MSSA->verifyMemorySSA();
3213 
3214  // Historically this pass has had issues with the dominator tree so verify it
3215  // in asserts builds.
3217 
3218  return Changed;
3219 }
3220 
3222 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3223  "Simple unswitch loops", false, false)
3230 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3232 
3234  return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3235 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::formLCSSA
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:336
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:511
cloneLoopNest
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Definition: SimpleLoopUnswitch.cpp:1252
AssumptionCache.h
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::createSimpleLoopUnswitchLegacyPass
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
Definition: SimpleLoopUnswitch.cpp:3233
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
UnswitchNumInitialUnscaledCandidates
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1899
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:54
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:266
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:217
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
ValueMapper.h
llvm::SwitchInstProfUpdateWrapper
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
Definition: Instructions.h:3550
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::PassInfoMixin< SimpleLoopUnswitchPass >
llvm::SwitchInstProfUpdateWrapper::getSuccessorWeight
CaseWeightOpt getSuccessorWeight(unsigned idx)
Definition: Instructions.cpp:4283
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::cfg::UpdateKind::Insert
@ Insert
unswitchTrivialBranch
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:427
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:689
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:194
llvm::SwitchInst::Create
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3394
recomputeLoopBlockSet
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
Definition: SimpleLoopUnswitch.cpp:1682
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:210
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:879
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1728
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1005
Local.h
llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1388
llvm::MemorySSAUpdater::removeBlocks
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
Definition: MemorySSAUpdater.cpp:1377
unswitchNontrivialInvariants
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, SmallVectorImpl< BasicBlock * > &ExitBlocks, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:2022
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:149
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin
iterator begin()
Definition: DenseMap.h:74
CalculateUnswitchCostMultiplier
static int CalculateUnswitchCostMultiplier(Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT, ArrayRef< std::pair< Instruction *, TinyPtrVector< Value * >>> UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
Definition: SimpleLoopUnswitch.cpp:2603
ScalarEvolution.h
unswitchBestCondition
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:2675
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
areLoopExitPHIsLoopInvariant
static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
Definition: SimpleLoopUnswitch.cpp:179
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::DominatorTreeBase::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::MemorySSAUpdater::moveAllAfterSpliceBlocks
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
Definition: MemorySSAUpdater.cpp:1252
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
turnGuardIntoBranch
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
Definition: SimpleLoopUnswitch.cpp:2527
unswitch
simple loop unswitch
Definition: SimpleLoopUnswitch.cpp:3230
llvm::Optional< uint32_t >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet< Instruction *, 8 >
llvm::SwitchInstProfUpdateWrapper::removeCase
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Definition: Instructions.cpp:4241
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::SwitchInst::cases
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Definition: Instructions.h:3451
llvm::SwitchInst::getDefaultDest
BasicBlock * getDefaultDest() const
Definition: Instructions.h:3412
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::SimpleLoopUnswitchPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: SimpleLoopUnswitch.cpp:3124
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:105
Sequence.h
llvm::Constant::isOneValue
bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:108
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:178
llvm::zip_first
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:741
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1637
computeDomSubtreeCost
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
Definition: SimpleLoopUnswitch.cpp:2477
Use.h
MustExecute.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
llvm::MemorySSAUpdater::applyUpdates
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:810
llvm::MemorySSAUpdater::createMemoryAccessInBB
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
Definition: MemorySSAUpdater.cpp:1434
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1295
llvm::LPPassManager::addLoop
void addLoop(Loop &L)
Definition: LoopPass.cpp:79
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1203
buildPartialInvariantUnswitchConditionalBranch
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
Definition: SimpleLoopUnswitch.cpp:212
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3158
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Optional::getPointer
constexpr const T * getPointer() const
Definition: Optional.h:280
LoopAnalysisManager.h
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1124
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
DropNonTrivialImplicitNullChecks
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
Instruction.h
CommandLine.h
CodeMetrics.h
replaceLoopInvariantUses
static void replaceLoopInvariantUses(Loop &L, Value *Invariant, Constant &Replacement)
Definition: SimpleLoopUnswitch.cpp:162
llvm::SwitchInst::CaseHandleImpl::getCaseSuccessor
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
Definition: Instructions.h:3277
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1547
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:253
hoistLoopToNewParent
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
Definition: SimpleLoopUnswitch.cpp:328
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::DominatorTreeBase::insertEdge
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
Definition: GenericDomTree.h:584
Twine.h
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:395
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:232
llvm::SwitchInst::CaseHandleImpl::getSuccessorIndex
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
Definition: Instructions.h:3288
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TinyPtrVector::push_back
void push_back(EltTy NewVal)
Definition: TinyPtrVector.h:244
rewritePHINodesForExitAndUnswitchedBlocks
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Definition: SimpleLoopUnswitch.cpp:283
llvm::LoopInfoBase::addTopLevelLoop
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1024
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopInfoBase::destroy
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:1071
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:377
UnswitchThreshold
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
false
Definition: StackSlotColoring.cpp:142
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
llvm::MemorySSAUpdater::moveToPlace
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Definition: MemorySSAUpdater.cpp:1205
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
unswitchLoop
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Unswitch control flow predicated on loop invariant conditions.
Definition: SimpleLoopUnswitch.cpp:2994
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:364
llvm::SwitchInstProfUpdateWrapper::setSuccessorWeight
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Definition: Instructions.cpp:4289
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
LoopUtils.h
llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:260
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
PatternMatch.h
visitDomSubTree
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
Definition: SimpleLoopUnswitch.cpp:1999
llvm::ValueMap::count
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:152
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:866
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:72
LoopIterator.h
deleteDeadClonedBlocks
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy >> VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:1561
llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1362
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:701
llvm::MemorySSAUpdater::removeDuplicatePhiEdgesBetween
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
Definition: MemorySSAUpdater.cpp:539
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:513
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::LoopInfoBase::AllocateLoop
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:931
collectHomogenousInstGraphLoopInvariants
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root, LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
Definition: SimpleLoopUnswitch.cpp:120
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3141
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::MemorySSAUpdater::applyInsertUpdates
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:861
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
buildPartialUnswitchConditionalBranch
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
Definition: SimpleLoopUnswitch.cpp:198
BasicBlock.h
llvm::cl::opt< bool >
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::MemorySSAUpdater::removeEdge
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
Definition: MemorySSAUpdater.cpp:532
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::SwitchInst::setDefaultDest
void setDefaultDest(BasicBlock *DefaultCase)
Definition: Instructions.h:3416
llvm::LoopPass
Definition: LoopPass.h:27
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2510
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:56
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:249
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:244
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1567
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:122
llvm::MemorySSAUpdater::updateForClonedLoop
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
Definition: MemorySSAUpdater.cpp:677
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3116
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:705
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:722
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:572
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:377
SimpleLoopUnswitch.h
UnswitchGuards
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:473
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ValueToValueMapTy
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
Definition: MemorySSAUpdater.h:52
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::IVConditionInfo::KnownValue
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:493
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::LoopBase::getBlocksVector
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
Definition: LoopInfo.h:192
llvm::IVConditionInfo::InstToDuplicate
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:490
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:942
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7509
llvm::MDNode
Metadata node.
Definition: Metadata.h:901
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:931
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
getTopMostExitingLoop
static Loop * getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI)
Definition: SimpleLoopUnswitch.cpp:399
CFG.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:850
llvm::LoopInfoBase::getLoopDepth
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:974
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:404
rebuildLoopAfterUnswitch
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops)
Rebuild a loop after unswitching removes some subset of blocks and edges.
Definition: SimpleLoopUnswitch.cpp:1793
llvm::LoopBase::getSubLoopsVector
std::vector< LoopT * > & getSubLoopsVector()
Definition: LoopInfo.h:147
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1554
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::MemorySSA::getBlockDefs
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:770
llvm::initializeSimpleLoopUnswitchLegacyPassPass
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
llvm::SwitchInst::CaseHandleImpl::getCaseValue
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Definition: Instructions.h:3270
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::LoopBase::reserveBlocks
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:436
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:520
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:978
loops
simple loop Simple unswitch loops
Definition: SimpleLoopUnswitch.cpp:3231
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
unswitchTrivialSwitch
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:655
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
llvm::findOptionMDForLoop
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1045
llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:995
llvm::DomTreeNodeBase< BasicBlock >
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1343
llvm::LoopInfoBase::removeLoop
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:994
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:855
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:671
llvm::MemorySSAUpdater::updateExitBlocksForClonedLoop
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Definition: MemorySSAUpdater.cpp:788
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:90
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::SimpleLoopUnswitchPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: SimpleLoopUnswitch.cpp:3055
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
Constant.h
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:493
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:62
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1682
deleteDeadBlocksFromLoop
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Definition: SimpleLoopUnswitch.cpp:1591
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7452
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:848
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:527
llvm::SwitchInstProfUpdateWrapper::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
Definition: Instructions.cpp:4255
MSSAThreshold
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:113
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::ICFLoopSafetyInfo
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2667
GenericDomTree.h
buildClonedLoops
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
Definition: SimpleLoopUnswitch.cpp:1311
llvm::LoopBase::getBlocksSet
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
Definition: LoopInfo.h:198
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:503
llvm::MemorySSA::BeforeTerminator
@ BeforeTerminator
Definition: MemorySSA.h:793
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1488
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::IVConditionInfo
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:487
llvm::Loop::isRecursivelyLCSSAForm
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:468
llvm::hasPartialIVCondition
Optional< IVConditionInfo > hasPartialIVCondition(Loop &L, unsigned MSSAThreshold, MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1593
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::SwitchInstProfUpdateWrapper::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
Definition: Instructions.cpp:4274
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
GuardUtils.h
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::TinyPtrVector::empty
bool empty() const
Definition: TinyPtrVector.h:163
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:943
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
Instructions.h
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:133
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:242
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1338
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
N
#define N
rewritePHINodesForUnswitchedExitBlock
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
Definition: SimpleLoopUnswitch.cpp:261
llvm::SwitchInst::CaseHandle
Definition: Instructions.h:3304
llvm::BasicBlock::size
size_t size() const
Definition: BasicBlock.h:306
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::Loop::isSafeToClone
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
Definition: LoopInfo.cpp:485
InstructionSimplify.h
llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:81
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2625
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:321
llvm::SmallVectorImpl< DominatorTree::UpdateType >
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:668
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ICFLoopSafetyInfo::isGuaranteedToExecute
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Definition: MustExecute.cpp:270
llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:165
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3204
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1418
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3060
raw_ostream.h
llvm::LoopBlocksRPO::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:814
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3139
llvm::PatternMatch::m_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
Definition: PatternMatch.h:2499
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3153
SetVector.h
llvm::DominatorTreeBase::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
unswitchAllTrivialConditions
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
Definition: SimpleLoopUnswitch.cpp:950
UnswitchSiblingsToplevelDiv
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
EnableNonTrivialUnswitch
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
EnableUnswitchCostMultiplier
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
buildClonedLoopBlocks
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Build the cloned blocks for an unswitched copy of the given loop.
Definition: SimpleLoopUnswitch.cpp:1068