LLVM 19.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Use.h"
47#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <iterator>
64#include <numeric>
65#include <optional>
66#include <utility>
67
68#define DEBUG_TYPE "simple-loop-unswitch"
69
70using namespace llvm;
71using namespace llvm::PatternMatch;
72
73STATISTIC(NumBranches, "Number of branches unswitched");
74STATISTIC(NumSwitches, "Number of switches unswitched");
75STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
76STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
77STATISTIC(NumTrivial, "Number of unswitches that are trivial");
79 NumCostMultiplierSkipped,
80 "Number of unswitch candidates that had their cost multiplier skipped");
81STATISTIC(NumInvariantConditionsInjected,
82 "Number of invariant conditions injected and unswitched");
83
85 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
86 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
87 "following the configuration passed into the pass."));
88
89static cl::opt<int>
90 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
91 cl::desc("The cost threshold for unswitching a loop."));
92
94 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
95 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
96 "explosion in nontrivial unswitch."));
98 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
99 cl::desc("Toplevel siblings divisor for cost multiplier."));
101 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
102 cl::desc("Number of unswitch candidates that are ignored when calculating "
103 "cost multiplier."));
105 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
106 cl::desc("If enabled, simple loop unswitching will also consider "
107 "llvm.experimental.guard intrinsics as unswitch candidates."));
109 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
110 cl::init(false), cl::Hidden,
111 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
112 "null checks to save time analyzing if we can keep it."));
114 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
115 cl::desc("Max number of memory uses to explore during "
116 "partial unswitching analysis"),
117 cl::init(100), cl::Hidden);
119 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
120 cl::desc("If enabled, the freeze instruction will be added to condition "
121 "of loop unswitch to prevent miscompilation."));
122
124 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
125 cl::desc("Whether we should inject new invariants and unswitch them to "
126 "eliminate some existing (non-invariant) conditions."),
127 cl::init(true));
128
130 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
131 cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
132 "unswitch on them to eliminate branches that are "
133 "not-taken 1/<this option> times or less."),
134 cl::init(16));
135
137namespace {
138struct CompareDesc {
139 BranchInst *Term;
140 Value *Invariant;
141 BasicBlock *InLoopSucc;
142
143 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
144 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
145};
146
147struct InjectedInvariant {
149 Value *LHS;
150 Value *RHS;
151 BasicBlock *InLoopSucc;
152
153 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
154 BasicBlock *InLoopSucc)
155 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
156};
157
158struct NonTrivialUnswitchCandidate {
159 Instruction *TI = nullptr;
160 TinyPtrVector<Value *> Invariants;
161 std::optional<InstructionCost> Cost;
162 std::optional<InjectedInvariant> PendingInjection;
163 NonTrivialUnswitchCandidate(
164 Instruction *TI, ArrayRef<Value *> Invariants,
165 std::optional<InstructionCost> Cost = std::nullopt,
166 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
167 : TI(TI), Invariants(Invariants), Cost(Cost),
168 PendingInjection(PendingInjection) {};
169
170 bool hasPendingInjection() const { return PendingInjection.has_value(); }
171};
172} // end anonymous namespace.
173
174// Helper to skip (select x, true, false), which matches both a logical AND and
175// OR and can confuse code that tries to determine if \p Cond is either a
176// logical AND or OR but not both.
178 Value *CondNext;
179 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
180 Cond = CondNext;
181 return Cond;
182}
183
184/// Collect all of the loop invariant input values transitively used by the
185/// homogeneous instruction graph from a given root.
186///
187/// This essentially walks from a root recursively through loop variant operands
188/// which have perform the same logical operation (AND or OR) and finds all
189/// inputs which are loop invariant. For some operations these can be
190/// re-associated and unswitched out of the loop entirely.
193 const LoopInfo &LI) {
194 assert(!L.isLoopInvariant(&Root) &&
195 "Only need to walk the graph if root itself is not invariant.");
196 TinyPtrVector<Value *> Invariants;
197
198 bool IsRootAnd = match(&Root, m_LogicalAnd());
199 bool IsRootOr = match(&Root, m_LogicalOr());
200
201 // Build a worklist and recurse through operators collecting invariants.
204 Worklist.push_back(&Root);
205 Visited.insert(&Root);
206 do {
207 Instruction &I = *Worklist.pop_back_val();
208 for (Value *OpV : I.operand_values()) {
209 // Skip constants as unswitching isn't interesting for them.
210 if (isa<Constant>(OpV))
211 continue;
212
213 // Add it to our result if loop invariant.
214 if (L.isLoopInvariant(OpV)) {
215 Invariants.push_back(OpV);
216 continue;
217 }
218
219 // If not an instruction with the same opcode, nothing we can do.
220 Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
221
222 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
223 (IsRootOr && match(OpI, m_LogicalOr())))) {
224 // Visit this operand.
225 if (Visited.insert(OpI).second)
226 Worklist.push_back(OpI);
227 }
228 }
229 } while (!Worklist.empty());
230
231 return Invariants;
232}
233
234static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
235 Constant &Replacement) {
236 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
237
238 // Replace uses of LIC in the loop with the given constant.
239 // We use make_early_inc_range as set invalidates the iterator.
240 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
241 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
242
243 // Replace this use within the loop body.
244 if (UserI && L.contains(UserI))
245 U.set(&Replacement);
246 }
247}
248
249/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
250/// incoming values along this edge.
252 const BasicBlock &ExitingBB,
253 const BasicBlock &ExitBB) {
254 for (const Instruction &I : ExitBB) {
255 auto *PN = dyn_cast<PHINode>(&I);
256 if (!PN)
257 // No more PHIs to check.
258 return true;
259
260 // If the incoming value for this edge isn't loop invariant the unswitch
261 // won't be trivial.
262 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
263 return false;
264 }
265 llvm_unreachable("Basic blocks should never be empty!");
266}
267
268/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
269/// end of \p BB and conditionally branch on the copied condition. We only
270/// branch on a single value.
272 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
273 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
274 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
275 IRBuilder<> IRB(&BB);
276
277 SmallVector<Value *> FrozenInvariants;
278 for (Value *Inv : Invariants) {
279 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
280 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
281 FrozenInvariants.push_back(Inv);
282 }
283
284 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
285 : IRB.CreateAnd(FrozenInvariants);
286 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
287 Direction ? &NormalSucc : &UnswitchedSucc);
288}
289
290/// Copy a set of loop invariant values, and conditionally branch on them.
292 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
293 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
294 MemorySSAUpdater *MSSAU) {
296 for (auto *Val : reverse(ToDuplicate)) {
297 Instruction *Inst = cast<Instruction>(Val);
298 Instruction *NewInst = Inst->clone();
299 NewInst->insertInto(&BB, BB.end());
300 RemapInstruction(NewInst, VMap,
302 VMap[Val] = NewInst;
303
304 if (!MSSAU)
305 continue;
306
307 MemorySSA *MSSA = MSSAU->getMemorySSA();
308 if (auto *MemUse =
309 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
310 auto *DefiningAccess = MemUse->getDefiningAccess();
311 // Get the first defining access before the loop.
312 while (L.contains(DefiningAccess->getBlock())) {
313 // If the defining access is a MemoryPhi, get the incoming
314 // value for the pre-header as defining access.
315 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
316 DefiningAccess =
317 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
318 else
319 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
320 }
321 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
322 NewInst->getParent(),
324 }
325 }
326
327 IRBuilder<> IRB(&BB);
328 Value *Cond = VMap[ToDuplicate[0]];
329 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
330 Direction ? &NormalSucc : &UnswitchedSucc);
331}
332
333/// Rewrite the PHI nodes in an unswitched loop exit basic block.
334///
335/// Requires that the loop exit and unswitched basic block are the same, and
336/// that the exiting block was a unique predecessor of that block. Rewrites the
337/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
338/// PHI nodes from the old preheader that now contains the unswitched
339/// terminator.
341 BasicBlock &OldExitingBB,
342 BasicBlock &OldPH) {
343 for (PHINode &PN : UnswitchedBB.phis()) {
344 // When the loop exit is directly unswitched we just need to update the
345 // incoming basic block. We loop to handle weird cases with repeated
346 // incoming blocks, but expect to typically only have one operand here.
347 for (auto i : seq<int>(0, PN.getNumOperands())) {
348 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
349 "Found incoming block different from unique predecessor!");
350 PN.setIncomingBlock(i, &OldPH);
351 }
352 }
353}
354
355/// Rewrite the PHI nodes in the loop exit basic block and the split off
356/// unswitched block.
357///
358/// Because the exit block remains an exit from the loop, this rewrites the
359/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
360/// nodes into the unswitched basic block to select between the value in the
361/// old preheader and the loop exit.
363 BasicBlock &UnswitchedBB,
364 BasicBlock &OldExitingBB,
365 BasicBlock &OldPH,
366 bool FullUnswitch) {
367 assert(&ExitBB != &UnswitchedBB &&
368 "Must have different loop exit and unswitched blocks!");
369 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
370 for (PHINode &PN : ExitBB.phis()) {
371 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
372 PN.getName() + ".split");
373 NewPN->insertBefore(InsertPt);
374
375 // Walk backwards over the old PHI node's inputs to minimize the cost of
376 // removing each one. We have to do this weird loop manually so that we
377 // create the same number of new incoming edges in the new PHI as we expect
378 // each case-based edge to be included in the unswitched switch in some
379 // cases.
380 // FIXME: This is really, really gross. It would be much cleaner if LLVM
381 // allowed us to create a single entry for a predecessor block without
382 // having separate entries for each "edge" even though these edges are
383 // required to produce identical results.
384 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
385 if (PN.getIncomingBlock(i) != &OldExitingBB)
386 continue;
387
388 Value *Incoming = PN.getIncomingValue(i);
389 if (FullUnswitch)
390 // No more edge from the old exiting block to the exit block.
391 PN.removeIncomingValue(i);
392
393 NewPN->addIncoming(Incoming, &OldPH);
394 }
395
396 // Now replace the old PHI with the new one and wire the old one in as an
397 // input to the new one.
398 PN.replaceAllUsesWith(NewPN);
399 NewPN->addIncoming(&PN, &ExitBB);
400 }
401}
402
403/// Hoist the current loop up to the innermost loop containing a remaining exit.
404///
405/// Because we've removed an exit from the loop, we may have changed the set of
406/// loops reachable and need to move the current loop up the loop nest or even
407/// to an entirely separate nest.
408static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
409 DominatorTree &DT, LoopInfo &LI,
410 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
411 // If the loop is already at the top level, we can't hoist it anywhere.
412 Loop *OldParentL = L.getParentLoop();
413 if (!OldParentL)
414 return;
415
417 L.getExitBlocks(Exits);
418 Loop *NewParentL = nullptr;
419 for (auto *ExitBB : Exits)
420 if (Loop *ExitL = LI.getLoopFor(ExitBB))
421 if (!NewParentL || NewParentL->contains(ExitL))
422 NewParentL = ExitL;
423
424 if (NewParentL == OldParentL)
425 return;
426
427 // The new parent loop (if different) should always contain the old one.
428 if (NewParentL)
429 assert(NewParentL->contains(OldParentL) &&
430 "Can only hoist this loop up the nest!");
431
432 // The preheader will need to move with the body of this loop. However,
433 // because it isn't in this loop we also need to update the primary loop map.
434 assert(OldParentL == LI.getLoopFor(&Preheader) &&
435 "Parent loop of this loop should contain this loop's preheader!");
436 LI.changeLoopFor(&Preheader, NewParentL);
437
438 // Remove this loop from its old parent.
439 OldParentL->removeChildLoop(&L);
440
441 // Add the loop either to the new parent or as a top-level loop.
442 if (NewParentL)
443 NewParentL->addChildLoop(&L);
444 else
445 LI.addTopLevelLoop(&L);
446
447 // Remove this loops blocks from the old parent and every other loop up the
448 // nest until reaching the new parent. Also update all of these
449 // no-longer-containing loops to reflect the nesting change.
450 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
451 OldContainingL = OldContainingL->getParentLoop()) {
452 llvm::erase_if(OldContainingL->getBlocksVector(),
453 [&](const BasicBlock *BB) {
454 return BB == &Preheader || L.contains(BB);
455 });
456
457 OldContainingL->getBlocksSet().erase(&Preheader);
458 for (BasicBlock *BB : L.blocks())
459 OldContainingL->getBlocksSet().erase(BB);
460
461 // Because we just hoisted a loop out of this one, we have essentially
462 // created new exit paths from it. That means we need to form LCSSA PHI
463 // nodes for values used in the no-longer-nested loop.
464 formLCSSA(*OldContainingL, DT, &LI, SE);
465
466 // We shouldn't need to form dedicated exits because the exit introduced
467 // here is the (just split by unswitching) preheader. However, after trivial
468 // unswitching it is possible to get new non-dedicated exits out of parent
469 // loop so let's conservatively form dedicated exit blocks and figure out
470 // if we can optimize later.
471 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
472 /*PreserveLCSSA*/ true);
473 }
474}
475
476// Return the top-most loop containing ExitBB and having ExitBB as exiting block
477// or the loop containing ExitBB, if there is no parent loop containing ExitBB
478// as exiting block.
480 const LoopInfo &LI) {
481 Loop *TopMost = LI.getLoopFor(ExitBB);
482 Loop *Current = TopMost;
483 while (Current) {
484 if (Current->isLoopExiting(ExitBB))
485 TopMost = Current;
486 Current = Current->getParentLoop();
487 }
488 return TopMost;
489}
490
491/// Unswitch a trivial branch if the condition is loop invariant.
492///
493/// This routine should only be called when loop code leading to the branch has
494/// been validated as trivial (no side effects). This routine checks if the
495/// condition is invariant and one of the successors is a loop exit. This
496/// allows us to unswitch without duplicating the loop, making it trivial.
497///
498/// If this routine fails to unswitch the branch it returns false.
499///
500/// If the branch can be unswitched, this routine splits the preheader and
501/// hoists the branch above that split. Preserves loop simplified form
502/// (splitting the exit block as necessary). It simplifies the branch within
503/// the loop to an unconditional branch but doesn't remove it entirely. Further
504/// cleanup can be done with some simplifycfg like pass.
505///
506/// If `SE` is not null, it will be updated based on the potential loop SCEVs
507/// invalidated by this.
509 LoopInfo &LI, ScalarEvolution *SE,
510 MemorySSAUpdater *MSSAU) {
511 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
512 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
513
514 // The loop invariant values that we want to unswitch.
515 TinyPtrVector<Value *> Invariants;
516
517 // When true, we're fully unswitching the branch rather than just unswitching
518 // some input conditions to the branch.
519 bool FullUnswitch = false;
520
522 if (L.isLoopInvariant(Cond)) {
523 Invariants.push_back(Cond);
524 FullUnswitch = true;
525 } else {
526 if (auto *CondInst = dyn_cast<Instruction>(Cond))
527 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
528 if (Invariants.empty()) {
529 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
530 return false;
531 }
532 }
533
534 // Check that one of the branch's successors exits, and which one.
535 bool ExitDirection = true;
536 int LoopExitSuccIdx = 0;
537 auto *LoopExitBB = BI.getSuccessor(0);
538 if (L.contains(LoopExitBB)) {
539 ExitDirection = false;
540 LoopExitSuccIdx = 1;
541 LoopExitBB = BI.getSuccessor(1);
542 if (L.contains(LoopExitBB)) {
543 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
544 return false;
545 }
546 }
547 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
548 auto *ParentBB = BI.getParent();
549 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
550 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
551 return false;
552 }
553
554 // When unswitching only part of the branch's condition, we need the exit
555 // block to be reached directly from the partially unswitched input. This can
556 // be done when the exit block is along the true edge and the branch condition
557 // is a graph of `or` operations, or the exit block is along the false edge
558 // and the condition is a graph of `and` operations.
559 if (!FullUnswitch) {
560 if (ExitDirection ? !match(Cond, m_LogicalOr())
561 : !match(Cond, m_LogicalAnd())) {
562 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
563 "non-full unswitch!\n");
564 return false;
565 }
566 }
567
568 LLVM_DEBUG({
569 dbgs() << " unswitching trivial invariant conditions for: " << BI
570 << "\n";
571 for (Value *Invariant : Invariants) {
572 dbgs() << " " << *Invariant << " == true";
573 if (Invariant != Invariants.back())
574 dbgs() << " ||";
575 dbgs() << "\n";
576 }
577 });
578
579 // If we have scalar evolutions, we need to invalidate them including this
580 // loop, the loop containing the exit block and the topmost parent loop
581 // exiting via LoopExitBB.
582 if (SE) {
583 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
584 SE->forgetLoop(ExitL);
585 else
586 // Forget the entire nest as this exits the entire nest.
587 SE->forgetTopmostLoop(&L);
589 }
590
591 if (MSSAU && VerifyMemorySSA)
592 MSSAU->getMemorySSA()->verifyMemorySSA();
593
594 // Split the preheader, so that we know that there is a safe place to insert
595 // the conditional branch. We will change the preheader to have a conditional
596 // branch on LoopCond.
597 BasicBlock *OldPH = L.getLoopPreheader();
598 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
599
600 // Now that we have a place to insert the conditional branch, create a place
601 // to branch to: this is the exit block out of the loop that we are
602 // unswitching. We need to split this if there are other loop predecessors.
603 // Because the loop is in simplified form, *any* other predecessor is enough.
604 BasicBlock *UnswitchedBB;
605 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
606 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
607 "A branch's parent isn't a predecessor!");
608 UnswitchedBB = LoopExitBB;
609 } else {
610 UnswitchedBB =
611 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
612 }
613
614 if (MSSAU && VerifyMemorySSA)
615 MSSAU->getMemorySSA()->verifyMemorySSA();
616
617 // Actually move the invariant uses into the unswitched position. If possible,
618 // we do this by moving the instructions, but when doing partial unswitching
619 // we do it by building a new merge of the values in the unswitched position.
620 OldPH->getTerminator()->eraseFromParent();
621 if (FullUnswitch) {
622 // If fully unswitching, we can use the existing branch instruction.
623 // Splice it into the old PH to gate reaching the new preheader and re-point
624 // its successors.
625 BI.moveBefore(*OldPH, OldPH->end());
626 BI.setCondition(Cond);
627 if (MSSAU) {
628 // Temporarily clone the terminator, to make MSSA update cheaper by
629 // separating "insert edge" updates from "remove edge" ones.
630 BI.clone()->insertInto(ParentBB, ParentBB->end());
631 } else {
632 // Create a new unconditional branch that will continue the loop as a new
633 // terminator.
634 BranchInst::Create(ContinueBB, ParentBB);
635 }
636 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
637 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
638 } else {
639 // Only unswitching a subset of inputs to the condition, so we will need to
640 // build a new branch that merges the invariant inputs.
641 if (ExitDirection)
643 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
644 "condition!");
645 else
647 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
648 " condition!");
650 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
651 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
652 }
653
654 // Update the dominator tree with the added edge.
655 DT.insertEdge(OldPH, UnswitchedBB);
656
657 // After the dominator tree was updated with the added edge, update MemorySSA
658 // if available.
659 if (MSSAU) {
661 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
662 MSSAU->applyInsertUpdates(Updates, DT);
663 }
664
665 // Finish updating dominator tree and memory ssa for full unswitch.
666 if (FullUnswitch) {
667 if (MSSAU) {
668 // Remove the cloned branch instruction.
669 ParentBB->getTerminator()->eraseFromParent();
670 // Create unconditional branch now.
671 BranchInst::Create(ContinueBB, ParentBB);
672 MSSAU->removeEdge(ParentBB, LoopExitBB);
673 }
674 DT.deleteEdge(ParentBB, LoopExitBB);
675 }
676
677 if (MSSAU && VerifyMemorySSA)
678 MSSAU->getMemorySSA()->verifyMemorySSA();
679
680 // Rewrite the relevant PHI nodes.
681 if (UnswitchedBB == LoopExitBB)
682 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
683 else
684 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
685 *ParentBB, *OldPH, FullUnswitch);
686
687 // The constant we can replace all of our invariants with inside the loop
688 // body. If any of the invariants have a value other than this the loop won't
689 // be entered.
690 ConstantInt *Replacement = ExitDirection
693
694 // Since this is an i1 condition we can also trivially replace uses of it
695 // within the loop with a constant.
696 for (Value *Invariant : Invariants)
697 replaceLoopInvariantUses(L, Invariant, *Replacement);
698
699 // If this was full unswitching, we may have changed the nesting relationship
700 // for this loop so hoist it to its correct parent if needed.
701 if (FullUnswitch)
702 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
703
704 if (MSSAU && VerifyMemorySSA)
705 MSSAU->getMemorySSA()->verifyMemorySSA();
706
707 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
708 ++NumTrivial;
709 ++NumBranches;
710 return true;
711}
712
713/// Unswitch a trivial switch if the condition is loop invariant.
714///
715/// This routine should only be called when loop code leading to the switch has
716/// been validated as trivial (no side effects). This routine checks if the
717/// condition is invariant and that at least one of the successors is a loop
718/// exit. This allows us to unswitch without duplicating the loop, making it
719/// trivial.
720///
721/// If this routine fails to unswitch the switch it returns false.
722///
723/// If the switch can be unswitched, this routine splits the preheader and
724/// copies the switch above that split. If the default case is one of the
725/// exiting cases, it copies the non-exiting cases and points them at the new
726/// preheader. If the default case is not exiting, it copies the exiting cases
727/// and points the default at the preheader. It preserves loop simplified form
728/// (splitting the exit blocks as necessary). It simplifies the switch within
729/// the loop by removing now-dead cases. If the default case is one of those
730/// unswitched, it replaces its destination with a new basic block containing
731/// only unreachable. Such basic blocks, while technically loop exits, are not
732/// considered for unswitching so this is a stable transform and the same
733/// switch will not be revisited. If after unswitching there is only a single
734/// in-loop successor, the switch is further simplified to an unconditional
735/// branch. Still more cleanup can be done with some simplifycfg like pass.
736///
737/// If `SE` is not null, it will be updated based on the potential loop SCEVs
738/// invalidated by this.
740 LoopInfo &LI, ScalarEvolution *SE,
741 MemorySSAUpdater *MSSAU) {
742 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
743 Value *LoopCond = SI.getCondition();
744
745 // If this isn't switching on an invariant condition, we can't unswitch it.
746 if (!L.isLoopInvariant(LoopCond))
747 return false;
748
749 auto *ParentBB = SI.getParent();
750
751 // The same check must be used both for the default and the exit cases. We
752 // should never leave edges from the switch instruction to a basic block that
753 // we are unswitching, hence the condition used to determine the default case
754 // needs to also be used to populate ExitCaseIndices, which is then used to
755 // remove cases from the switch.
756 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
757 // BBToCheck is not an exit block if it is inside loop L.
758 if (L.contains(&BBToCheck))
759 return false;
760 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
761 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
762 return false;
763 // We do not unswitch a block that only has an unreachable statement, as
764 // it's possible this is a previously unswitched block. Only unswitch if
765 // either the terminator is not unreachable, or, if it is, it's not the only
766 // instruction in the block.
767 auto *TI = BBToCheck.getTerminator();
768 bool isUnreachable = isa<UnreachableInst>(TI);
769 return !isUnreachable ||
770 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
771 };
772
773 SmallVector<int, 4> ExitCaseIndices;
774 for (auto Case : SI.cases())
775 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
776 ExitCaseIndices.push_back(Case.getCaseIndex());
777 BasicBlock *DefaultExitBB = nullptr;
780 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
781 DefaultExitBB = SI.getDefaultDest();
782 } else if (ExitCaseIndices.empty())
783 return false;
784
785 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
786
787 if (MSSAU && VerifyMemorySSA)
788 MSSAU->getMemorySSA()->verifyMemorySSA();
789
790 // We may need to invalidate SCEVs for the outermost loop reached by any of
791 // the exits.
792 Loop *OuterL = &L;
793
794 if (DefaultExitBB) {
795 // Check the loop containing this exit.
796 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
797 if (!ExitL || ExitL->contains(OuterL))
798 OuterL = ExitL;
799 }
800 for (unsigned Index : ExitCaseIndices) {
801 auto CaseI = SI.case_begin() + Index;
802 // Compute the outer loop from this exit.
803 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
804 if (!ExitL || ExitL->contains(OuterL))
805 OuterL = ExitL;
806 }
807
808 if (SE) {
809 if (OuterL)
810 SE->forgetLoop(OuterL);
811 else
812 SE->forgetTopmostLoop(&L);
813 }
814
815 if (DefaultExitBB) {
816 // Clear out the default destination temporarily to allow accurate
817 // predecessor lists to be examined below.
818 SI.setDefaultDest(nullptr);
819 }
820
821 // Store the exit cases into a separate data structure and remove them from
822 // the switch.
823 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
825 4> ExitCases;
826 ExitCases.reserve(ExitCaseIndices.size());
828 // We walk the case indices backwards so that we remove the last case first
829 // and don't disrupt the earlier indices.
830 for (unsigned Index : reverse(ExitCaseIndices)) {
831 auto CaseI = SI.case_begin() + Index;
832 // Save the value of this case.
833 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
834 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
835 // Delete the unswitched cases.
836 SIW.removeCase(CaseI);
837 }
838
839 // Check if after this all of the remaining cases point at the same
840 // successor.
841 BasicBlock *CommonSuccBB = nullptr;
842 if (SI.getNumCases() > 0 &&
843 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
844 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
845 }))
846 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
847 if (!DefaultExitBB) {
848 // If we're not unswitching the default, we need it to match any cases to
849 // have a common successor or if we have no cases it is the common
850 // successor.
851 if (SI.getNumCases() == 0)
852 CommonSuccBB = SI.getDefaultDest();
853 else if (SI.getDefaultDest() != CommonSuccBB)
854 CommonSuccBB = nullptr;
855 }
856
857 // Split the preheader, so that we know that there is a safe place to insert
858 // the switch.
859 BasicBlock *OldPH = L.getLoopPreheader();
860 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
861 OldPH->getTerminator()->eraseFromParent();
862
863 // Now add the unswitched switch.
864 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
865 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
866
867 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
868 // First, we split any exit blocks with remaining in-loop predecessors. Then
869 // we update the PHIs in one of two ways depending on if there was a split.
870 // We walk in reverse so that we split in the same order as the cases
871 // appeared. This is purely for convenience of reading the resulting IR, but
872 // it doesn't cost anything really.
873 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
875 // Handle the default exit if necessary.
876 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
877 // ranges aren't quite powerful enough yet.
878 if (DefaultExitBB) {
879 if (pred_empty(DefaultExitBB)) {
880 UnswitchedExitBBs.insert(DefaultExitBB);
881 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
882 } else {
883 auto *SplitBB =
884 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
885 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
886 *ParentBB, *OldPH,
887 /*FullUnswitch*/ true);
888 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
889 }
890 }
891 // Note that we must use a reference in the for loop so that we update the
892 // container.
893 for (auto &ExitCase : reverse(ExitCases)) {
894 // Grab a reference to the exit block in the pair so that we can update it.
895 BasicBlock *ExitBB = std::get<1>(ExitCase);
896
897 // If this case is the last edge into the exit block, we can simply reuse it
898 // as it will no longer be a loop exit. No mapping necessary.
899 if (pred_empty(ExitBB)) {
900 // Only rewrite once.
901 if (UnswitchedExitBBs.insert(ExitBB).second)
902 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
903 continue;
904 }
905
906 // Otherwise we need to split the exit block so that we retain an exit
907 // block from the loop and a target for the unswitched condition.
908 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
909 if (!SplitExitBB) {
910 // If this is the first time we see this, do the split and remember it.
911 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
912 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
913 *ParentBB, *OldPH,
914 /*FullUnswitch*/ true);
915 }
916 // Update the case pair to point to the split block.
917 std::get<1>(ExitCase) = SplitExitBB;
918 }
919
920 // Now add the unswitched cases. We do this in reverse order as we built them
921 // in reverse order.
922 for (auto &ExitCase : reverse(ExitCases)) {
923 ConstantInt *CaseVal = std::get<0>(ExitCase);
924 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
925
926 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
927 }
928
929 // If the default was unswitched, re-point it and add explicit cases for
930 // entering the loop.
931 if (DefaultExitBB) {
932 NewSIW->setDefaultDest(DefaultExitBB);
933 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
934
935 // We removed all the exit cases, so we just copy the cases to the
936 // unswitched switch.
937 for (const auto &Case : SI.cases())
938 NewSIW.addCase(Case.getCaseValue(), NewPH,
940 } else if (DefaultCaseWeight) {
941 // We have to set branch weight of the default case.
942 uint64_t SW = *DefaultCaseWeight;
943 for (const auto &Case : SI.cases()) {
944 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
945 assert(W &&
946 "case weight must be defined as default case weight is defined");
947 SW += *W;
948 }
949 NewSIW.setSuccessorWeight(0, SW);
950 }
951
952 // If we ended up with a common successor for every path through the switch
953 // after unswitching, rewrite it to an unconditional branch to make it easy
954 // to recognize. Otherwise we potentially have to recognize the default case
955 // pointing at unreachable and other complexity.
956 if (CommonSuccBB) {
957 BasicBlock *BB = SI.getParent();
958 // We may have had multiple edges to this common successor block, so remove
959 // them as predecessors. We skip the first one, either the default or the
960 // actual first case.
961 bool SkippedFirst = DefaultExitBB == nullptr;
962 for (auto Case : SI.cases()) {
963 assert(Case.getCaseSuccessor() == CommonSuccBB &&
964 "Non-common successor!");
965 (void)Case;
966 if (!SkippedFirst) {
967 SkippedFirst = true;
968 continue;
969 }
970 CommonSuccBB->removePredecessor(BB,
971 /*KeepOneInputPHIs*/ true);
972 }
973 // Now nuke the switch and replace it with a direct branch.
974 SIW.eraseFromParent();
975 BranchInst::Create(CommonSuccBB, BB);
976 } else if (DefaultExitBB) {
977 assert(SI.getNumCases() > 0 &&
978 "If we had no cases we'd have a common successor!");
979 // Move the last case to the default successor. This is valid as if the
980 // default got unswitched it cannot be reached. This has the advantage of
981 // being simple and keeping the number of edges from this switch to
982 // successors the same, and avoiding any PHI update complexity.
983 auto LastCaseI = std::prev(SI.case_end());
984
985 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
987 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
988 SIW.removeCase(LastCaseI);
989 }
990
991 // Walk the unswitched exit blocks and the unswitched split blocks and update
992 // the dominator tree based on the CFG edits. While we are walking unordered
993 // containers here, the API for applyUpdates takes an unordered list of
994 // updates and requires them to not contain duplicates.
996 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
997 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
998 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
999 }
1000 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1001 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1002 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1003 }
1004
1005 if (MSSAU) {
1006 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1007 if (VerifyMemorySSA)
1008 MSSAU->getMemorySSA()->verifyMemorySSA();
1009 } else {
1010 DT.applyUpdates(DTUpdates);
1011 }
1012
1013 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1014
1015 // We may have changed the nesting relationship for this loop so hoist it to
1016 // its correct parent if needed.
1017 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1018
1019 if (MSSAU && VerifyMemorySSA)
1020 MSSAU->getMemorySSA()->verifyMemorySSA();
1021
1022 ++NumTrivial;
1023 ++NumSwitches;
1024 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1025 return true;
1026}
1027
1028/// This routine scans the loop to find a branch or switch which occurs before
1029/// any side effects occur. These can potentially be unswitched without
1030/// duplicating the loop. If a branch or switch is successfully unswitched the
1031/// scanning continues to see if subsequent branches or switches have become
1032/// trivial. Once all trivial candidates have been unswitched, this routine
1033/// returns.
1034///
1035/// The return value indicates whether anything was unswitched (and therefore
1036/// changed).
1037///
1038/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1039/// invalidated by this.
1041 LoopInfo &LI, ScalarEvolution *SE,
1042 MemorySSAUpdater *MSSAU) {
1043 bool Changed = false;
1044
1045 // If loop header has only one reachable successor we should keep looking for
1046 // trivial condition candidates in the successor as well. An alternative is
1047 // to constant fold conditions and merge successors into loop header (then we
1048 // only need to check header's terminator). The reason for not doing this in
1049 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1050 // invariants. Folding dead branches could either eliminate the current loop
1051 // or make other loops unreachable. LCSSA form might also not be preserved
1052 // after deleting branches. The following code keeps traversing loop header's
1053 // successors until it finds the trivial condition candidate (condition that
1054 // is not a constant). Since unswitching generates branches with constant
1055 // conditions, this scenario could be very common in practice.
1056 BasicBlock *CurrentBB = L.getHeader();
1058 Visited.insert(CurrentBB);
1059 do {
1060 // Check if there are any side-effecting instructions (e.g. stores, calls,
1061 // volatile loads) in the part of the loop that the code *would* execute
1062 // without unswitching.
1063 if (MSSAU) // Possible early exit with MSSA
1064 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1065 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1066 return Changed;
1067 if (llvm::any_of(*CurrentBB,
1068 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1069 return Changed;
1070
1071 Instruction *CurrentTerm = CurrentBB->getTerminator();
1072
1073 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1074 // Don't bother trying to unswitch past a switch with a constant
1075 // condition. This should be removed prior to running this pass by
1076 // simplifycfg.
1077 if (isa<Constant>(SI->getCondition()))
1078 return Changed;
1079
1080 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1081 // Couldn't unswitch this one so we're done.
1082 return Changed;
1083
1084 // Mark that we managed to unswitch something.
1085 Changed = true;
1086
1087 // If unswitching turned the terminator into an unconditional branch then
1088 // we can continue. The unswitching logic specifically works to fold any
1089 // cases it can into an unconditional branch to make it easier to
1090 // recognize here.
1091 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1092 if (!BI || BI->isConditional())
1093 return Changed;
1094
1095 CurrentBB = BI->getSuccessor(0);
1096 continue;
1097 }
1098
1099 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1100 if (!BI)
1101 // We do not understand other terminator instructions.
1102 return Changed;
1103
1104 // Don't bother trying to unswitch past an unconditional branch or a branch
1105 // with a constant value. These should be removed by simplifycfg prior to
1106 // running this pass.
1107 if (!BI->isConditional() ||
1108 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1109 return Changed;
1110
1111 // Found a trivial condition candidate: non-foldable conditional branch. If
1112 // we fail to unswitch this, we can't do anything else that is trivial.
1113 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1114 return Changed;
1115
1116 // Mark that we managed to unswitch something.
1117 Changed = true;
1118
1119 // If we only unswitched some of the conditions feeding the branch, we won't
1120 // have collapsed it to a single successor.
1121 BI = cast<BranchInst>(CurrentBB->getTerminator());
1122 if (BI->isConditional())
1123 return Changed;
1124
1125 // Follow the newly unconditional branch into its successor.
1126 CurrentBB = BI->getSuccessor(0);
1127
1128 // When continuing, if we exit the loop or reach a previous visited block,
1129 // then we can not reach any trivial condition candidates (unfoldable
1130 // branch instructions or switch instructions) and no unswitch can happen.
1131 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1132
1133 return Changed;
1134}
1135
1136/// Build the cloned blocks for an unswitched copy of the given loop.
1137///
1138/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1139/// after the split block (`SplitBB`) that will be used to select between the
1140/// cloned and original loop.
1141///
1142/// This routine handles cloning all of the necessary loop blocks and exit
1143/// blocks including rewriting their instructions and the relevant PHI nodes.
1144/// Any loop blocks or exit blocks which are dominated by a different successor
1145/// than the one for this clone of the loop blocks can be trivially skipped. We
1146/// use the `DominatingSucc` map to determine whether a block satisfies that
1147/// property with a simple map lookup.
1148///
1149/// It also correctly creates the unconditional branch in the cloned
1150/// unswitched parent block to only point at the unswitched successor.
1151///
1152/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1153/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1154/// the cloned blocks (and their loops) are left without full `LoopInfo`
1155/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1156/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1157/// instead the caller must recompute an accurate DT. It *does* correctly
1158/// update the `AssumptionCache` provided in `AC`.
1160 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1161 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1162 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1164 ValueToValueMapTy &VMap,
1166 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1167 ScalarEvolution *SE) {
1169 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1170
1171 // We will need to clone a bunch of blocks, wrap up the clone operation in
1172 // a helper.
1173 auto CloneBlock = [&](BasicBlock *OldBB) {
1174 // Clone the basic block and insert it before the new preheader.
1175 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1176 NewBB->moveBefore(LoopPH);
1177
1178 // Record this block and the mapping.
1179 NewBlocks.push_back(NewBB);
1180 VMap[OldBB] = NewBB;
1181
1182 return NewBB;
1183 };
1184
1185 // We skip cloning blocks when they have a dominating succ that is not the
1186 // succ we are cloning for.
1187 auto SkipBlock = [&](BasicBlock *BB) {
1188 auto It = DominatingSucc.find(BB);
1189 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1190 };
1191
1192 // First, clone the preheader.
1193 auto *ClonedPH = CloneBlock(LoopPH);
1194
1195 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1196 for (auto *LoopBB : L.blocks())
1197 if (!SkipBlock(LoopBB))
1198 CloneBlock(LoopBB);
1199
1200 // Split all the loop exit edges so that when we clone the exit blocks, if
1201 // any of the exit blocks are *also* a preheader for some other loop, we
1202 // don't create multiple predecessors entering the loop header.
1203 for (auto *ExitBB : ExitBlocks) {
1204 if (SkipBlock(ExitBB))
1205 continue;
1206
1207 // When we are going to clone an exit, we don't need to clone all the
1208 // instructions in the exit block and we want to ensure we have an easy
1209 // place to merge the CFG, so split the exit first. This is always safe to
1210 // do because there cannot be any non-loop predecessors of a loop exit in
1211 // loop simplified form.
1212 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1213
1214 // Rearrange the names to make it easier to write test cases by having the
1215 // exit block carry the suffix rather than the merge block carrying the
1216 // suffix.
1217 MergeBB->takeName(ExitBB);
1218 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1219
1220 // Now clone the original exit block.
1221 auto *ClonedExitBB = CloneBlock(ExitBB);
1222 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1223 "Exit block should have been split to have one successor!");
1224 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1225 "Cloned exit block has the wrong successor!");
1226
1227 // Remap any cloned instructions and create a merge phi node for them.
1228 for (auto ZippedInsts : llvm::zip_first(
1229 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1230 llvm::make_range(ClonedExitBB->begin(),
1231 std::prev(ClonedExitBB->end())))) {
1232 Instruction &I = std::get<0>(ZippedInsts);
1233 Instruction &ClonedI = std::get<1>(ZippedInsts);
1234
1235 // The only instructions in the exit block should be PHI nodes and
1236 // potentially a landing pad.
1237 assert(
1238 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1239 "Bad instruction in exit block!");
1240 // We should have a value map between the instruction and its clone.
1241 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1242
1243 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1244 if (SE && isa<PHINode>(I))
1245 SE->forgetValue(&I);
1246
1247 auto *MergePN =
1248 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1249 MergePN->insertBefore(MergeBB->getFirstInsertionPt());
1250 I.replaceAllUsesWith(MergePN);
1251 MergePN->addIncoming(&I, ExitBB);
1252 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1253 }
1254 }
1255
1256 // Rewrite the instructions in the cloned blocks to refer to the instructions
1257 // in the cloned blocks. We have to do this as a second pass so that we have
1258 // everything available. Also, we have inserted new instructions which may
1259 // include assume intrinsics, so we update the assumption cache while
1260 // processing this.
1261 Module *M = ClonedPH->getParent()->getParent();
1262 for (auto *ClonedBB : NewBlocks)
1263 for (Instruction &I : *ClonedBB) {
1264 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1266 RemapInstruction(&I, VMap,
1268 if (auto *II = dyn_cast<AssumeInst>(&I))
1270 }
1271
1272 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1273 // have spurious incoming values.
1274 for (auto *LoopBB : L.blocks())
1275 if (SkipBlock(LoopBB))
1276 for (auto *SuccBB : successors(LoopBB))
1277 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1278 for (PHINode &PN : ClonedSuccBB->phis())
1279 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1280
1281 // Remove the cloned parent as a predecessor of any successor we ended up
1282 // cloning other than the unswitched one.
1283 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1284 for (auto *SuccBB : successors(ParentBB)) {
1285 if (SuccBB == UnswitchedSuccBB)
1286 continue;
1287
1288 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1289 if (!ClonedSuccBB)
1290 continue;
1291
1292 ClonedSuccBB->removePredecessor(ClonedParentBB,
1293 /*KeepOneInputPHIs*/ true);
1294 }
1295
1296 // Replace the cloned branch with an unconditional branch to the cloned
1297 // unswitched successor.
1298 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1299 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1300 // Trivial Simplification. If Terminator is a conditional branch and
1301 // condition becomes dead - erase it.
1302 Value *ClonedConditionToErase = nullptr;
1303 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1304 ClonedConditionToErase = BI->getCondition();
1305 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1306 ClonedConditionToErase = SI->getCondition();
1307
1308 ClonedTerminator->eraseFromParent();
1309 BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1310
1311 if (ClonedConditionToErase)
1312 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1313 MSSAU);
1314
1315 // If there are duplicate entries in the PHI nodes because of multiple edges
1316 // to the unswitched successor, we need to nuke all but one as we replaced it
1317 // with a direct branch.
1318 for (PHINode &PN : ClonedSuccBB->phis()) {
1319 bool Found = false;
1320 // Loop over the incoming operands backwards so we can easily delete as we
1321 // go without invalidating the index.
1322 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1323 if (PN.getIncomingBlock(i) != ClonedParentBB)
1324 continue;
1325 if (!Found) {
1326 Found = true;
1327 continue;
1328 }
1329 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1330 }
1331 }
1332
1333 // Record the domtree updates for the new blocks.
1335 for (auto *ClonedBB : NewBlocks) {
1336 for (auto *SuccBB : successors(ClonedBB))
1337 if (SuccSet.insert(SuccBB).second)
1338 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1339 SuccSet.clear();
1340 }
1341
1342 return ClonedPH;
1343}
1344
1345/// Recursively clone the specified loop and all of its children.
1346///
1347/// The target parent loop for the clone should be provided, or can be null if
1348/// the clone is a top-level loop. While cloning, all the blocks are mapped
1349/// with the provided value map. The entire original loop must be present in
1350/// the value map. The cloned loop is returned.
1351static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1352 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1353 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1354 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1355 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1356 for (auto *BB : OrigL.blocks()) {
1357 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1358 ClonedL.addBlockEntry(ClonedBB);
1359 if (LI.getLoopFor(BB) == &OrigL)
1360 LI.changeLoopFor(ClonedBB, &ClonedL);
1361 }
1362 };
1363
1364 // We specially handle the first loop because it may get cloned into
1365 // a different parent and because we most commonly are cloning leaf loops.
1366 Loop *ClonedRootL = LI.AllocateLoop();
1367 if (RootParentL)
1368 RootParentL->addChildLoop(ClonedRootL);
1369 else
1370 LI.addTopLevelLoop(ClonedRootL);
1371 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1372
1373 if (OrigRootL.isInnermost())
1374 return ClonedRootL;
1375
1376 // If we have a nest, we can quickly clone the entire loop nest using an
1377 // iterative approach because it is a tree. We keep the cloned parent in the
1378 // data structure to avoid repeatedly querying through a map to find it.
1379 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1380 // Build up the loops to clone in reverse order as we'll clone them from the
1381 // back.
1382 for (Loop *ChildL : llvm::reverse(OrigRootL))
1383 LoopsToClone.push_back({ClonedRootL, ChildL});
1384 do {
1385 Loop *ClonedParentL, *L;
1386 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1387 Loop *ClonedL = LI.AllocateLoop();
1388 ClonedParentL->addChildLoop(ClonedL);
1389 AddClonedBlocksToLoop(*L, *ClonedL);
1390 for (Loop *ChildL : llvm::reverse(*L))
1391 LoopsToClone.push_back({ClonedL, ChildL});
1392 } while (!LoopsToClone.empty());
1393
1394 return ClonedRootL;
1395}
1396
1397/// Build the cloned loops of an original loop from unswitching.
1398///
1399/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1400/// operation. We need to re-verify that there even is a loop (as the backedge
1401/// may not have been cloned), and even if there are remaining backedges the
1402/// backedge set may be different. However, we know that each child loop is
1403/// undisturbed, we only need to find where to place each child loop within
1404/// either any parent loop or within a cloned version of the original loop.
1405///
1406/// Because child loops may end up cloned outside of any cloned version of the
1407/// original loop, multiple cloned sibling loops may be created. All of them
1408/// are returned so that the newly introduced loop nest roots can be
1409/// identified.
1410static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1411 const ValueToValueMapTy &VMap, LoopInfo &LI,
1412 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1413 Loop *ClonedL = nullptr;
1414
1415 auto *OrigPH = OrigL.getLoopPreheader();
1416 auto *OrigHeader = OrigL.getHeader();
1417
1418 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1419 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1420
1421 // We need to know the loops of the cloned exit blocks to even compute the
1422 // accurate parent loop. If we only clone exits to some parent of the
1423 // original parent, we want to clone into that outer loop. We also keep track
1424 // of the loops that our cloned exit blocks participate in.
1425 Loop *ParentL = nullptr;
1426 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1428 ClonedExitsInLoops.reserve(ExitBlocks.size());
1429 for (auto *ExitBB : ExitBlocks)
1430 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1431 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1432 ExitLoopMap[ClonedExitBB] = ExitL;
1433 ClonedExitsInLoops.push_back(ClonedExitBB);
1434 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1435 ParentL = ExitL;
1436 }
1437 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1438 ParentL->contains(OrigL.getParentLoop())) &&
1439 "The computed parent loop should always contain (or be) the parent of "
1440 "the original loop.");
1441
1442 // We build the set of blocks dominated by the cloned header from the set of
1443 // cloned blocks out of the original loop. While not all of these will
1444 // necessarily be in the cloned loop, it is enough to establish that they
1445 // aren't in unreachable cycles, etc.
1446 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1447 for (auto *BB : OrigL.blocks())
1448 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1449 ClonedLoopBlocks.insert(ClonedBB);
1450
1451 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1452 // skipped cloning some region of this loop which can in turn skip some of
1453 // the backedges so we have to rebuild the blocks in the loop based on the
1454 // backedges that remain after cloning.
1456 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1457 for (auto *Pred : predecessors(ClonedHeader)) {
1458 // The only possible non-loop header predecessor is the preheader because
1459 // we know we cloned the loop in simplified form.
1460 if (Pred == ClonedPH)
1461 continue;
1462
1463 // Because the loop was in simplified form, the only non-loop predecessor
1464 // should be the preheader.
1465 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1466 "header other than the preheader "
1467 "that is not part of the loop!");
1468
1469 // Insert this block into the loop set and on the first visit (and if it
1470 // isn't the header we're currently walking) put it into the worklist to
1471 // recurse through.
1472 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1473 Worklist.push_back(Pred);
1474 }
1475
1476 // If we had any backedges then there *is* a cloned loop. Put the header into
1477 // the loop set and then walk the worklist backwards to find all the blocks
1478 // that remain within the loop after cloning.
1479 if (!BlocksInClonedLoop.empty()) {
1480 BlocksInClonedLoop.insert(ClonedHeader);
1481
1482 while (!Worklist.empty()) {
1483 BasicBlock *BB = Worklist.pop_back_val();
1484 assert(BlocksInClonedLoop.count(BB) &&
1485 "Didn't put block into the loop set!");
1486
1487 // Insert any predecessors that are in the possible set into the cloned
1488 // set, and if the insert is successful, add them to the worklist. Note
1489 // that we filter on the blocks that are definitely reachable via the
1490 // backedge to the loop header so we may prune out dead code within the
1491 // cloned loop.
1492 for (auto *Pred : predecessors(BB))
1493 if (ClonedLoopBlocks.count(Pred) &&
1494 BlocksInClonedLoop.insert(Pred).second)
1495 Worklist.push_back(Pred);
1496 }
1497
1498 ClonedL = LI.AllocateLoop();
1499 if (ParentL) {
1500 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1501 ParentL->addChildLoop(ClonedL);
1502 } else {
1503 LI.addTopLevelLoop(ClonedL);
1504 }
1505 NonChildClonedLoops.push_back(ClonedL);
1506
1507 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1508 // We don't want to just add the cloned loop blocks based on how we
1509 // discovered them. The original order of blocks was carefully built in
1510 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1511 // that logic, we just re-walk the original blocks (and those of the child
1512 // loops) and filter them as we add them into the cloned loop.
1513 for (auto *BB : OrigL.blocks()) {
1514 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1515 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1516 continue;
1517
1518 // Directly add the blocks that are only in this loop.
1519 if (LI.getLoopFor(BB) == &OrigL) {
1520 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1521 continue;
1522 }
1523
1524 // We want to manually add it to this loop and parents.
1525 // Registering it with LoopInfo will happen when we clone the top
1526 // loop for this block.
1527 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1528 PL->addBlockEntry(ClonedBB);
1529 }
1530
1531 // Now add each child loop whose header remains within the cloned loop. All
1532 // of the blocks within the loop must satisfy the same constraints as the
1533 // header so once we pass the header checks we can just clone the entire
1534 // child loop nest.
1535 for (Loop *ChildL : OrigL) {
1536 auto *ClonedChildHeader =
1537 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1538 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1539 continue;
1540
1541#ifndef NDEBUG
1542 // We should never have a cloned child loop header but fail to have
1543 // all of the blocks for that child loop.
1544 for (auto *ChildLoopBB : ChildL->blocks())
1545 assert(BlocksInClonedLoop.count(
1546 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1547 "Child cloned loop has a header within the cloned outer "
1548 "loop but not all of its blocks!");
1549#endif
1550
1551 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1552 }
1553 }
1554
1555 // Now that we've handled all the components of the original loop that were
1556 // cloned into a new loop, we still need to handle anything from the original
1557 // loop that wasn't in a cloned loop.
1558
1559 // Figure out what blocks are left to place within any loop nest containing
1560 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1561 // them.
1562 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1563 if (BlocksInClonedLoop.empty())
1564 UnloopedBlockSet.insert(ClonedPH);
1565 for (auto *ClonedBB : ClonedLoopBlocks)
1566 if (!BlocksInClonedLoop.count(ClonedBB))
1567 UnloopedBlockSet.insert(ClonedBB);
1568
1569 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1570 // backwards across these to process them inside out. The order shouldn't
1571 // matter as we're just trying to build up the map from inside-out; we use
1572 // the map in a more stably ordered way below.
1573 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1574 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1575 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1576 ExitLoopMap.lookup(RHS)->getLoopDepth();
1577 });
1578
1579 // Populate the existing ExitLoopMap with everything reachable from each
1580 // exit, starting from the inner most exit.
1581 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1582 assert(Worklist.empty() && "Didn't clear worklist!");
1583
1584 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1585 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1586
1587 // Walk the CFG back until we hit the cloned PH adding everything reachable
1588 // and in the unlooped set to this exit block's loop.
1589 Worklist.push_back(ExitBB);
1590 do {
1591 BasicBlock *BB = Worklist.pop_back_val();
1592 // We can stop recursing at the cloned preheader (if we get there).
1593 if (BB == ClonedPH)
1594 continue;
1595
1596 for (BasicBlock *PredBB : predecessors(BB)) {
1597 // If this pred has already been moved to our set or is part of some
1598 // (inner) loop, no update needed.
1599 if (!UnloopedBlockSet.erase(PredBB)) {
1600 assert(
1601 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1602 "Predecessor not mapped to a loop!");
1603 continue;
1604 }
1605
1606 // We just insert into the loop set here. We'll add these blocks to the
1607 // exit loop after we build up the set in an order that doesn't rely on
1608 // predecessor order (which in turn relies on use list order).
1609 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1610 (void)Inserted;
1611 assert(Inserted && "Should only visit an unlooped block once!");
1612
1613 // And recurse through to its predecessors.
1614 Worklist.push_back(PredBB);
1615 }
1616 } while (!Worklist.empty());
1617 }
1618
1619 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1620 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1621 // in their original order adding them to the correct loop.
1622
1623 // We need a stable insertion order. We use the order of the original loop
1624 // order and map into the correct parent loop.
1625 for (auto *BB : llvm::concat<BasicBlock *const>(
1626 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1627 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1628 OuterL->addBasicBlockToLoop(BB, LI);
1629
1630#ifndef NDEBUG
1631 for (auto &BBAndL : ExitLoopMap) {
1632 auto *BB = BBAndL.first;
1633 auto *OuterL = BBAndL.second;
1634 assert(LI.getLoopFor(BB) == OuterL &&
1635 "Failed to put all blocks into outer loops!");
1636 }
1637#endif
1638
1639 // Now that all the blocks are placed into the correct containing loop in the
1640 // absence of child loops, find all the potentially cloned child loops and
1641 // clone them into whatever outer loop we placed their header into.
1642 for (Loop *ChildL : OrigL) {
1643 auto *ClonedChildHeader =
1644 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1645 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1646 continue;
1647
1648#ifndef NDEBUG
1649 for (auto *ChildLoopBB : ChildL->blocks())
1650 assert(VMap.count(ChildLoopBB) &&
1651 "Cloned a child loop header but not all of that loops blocks!");
1652#endif
1653
1654 NonChildClonedLoops.push_back(cloneLoopNest(
1655 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1656 }
1657}
1658
1659static void
1661 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1662 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1663 // Find all the dead clones, and remove them from their successors.
1665 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1666 for (const auto &VMap : VMaps)
1667 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1668 if (!DT.isReachableFromEntry(ClonedBB)) {
1669 for (BasicBlock *SuccBB : successors(ClonedBB))
1670 SuccBB->removePredecessor(ClonedBB);
1671 DeadBlocks.push_back(ClonedBB);
1672 }
1673
1674 // Remove all MemorySSA in the dead blocks
1675 if (MSSAU) {
1676 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1677 DeadBlocks.end());
1678 MSSAU->removeBlocks(DeadBlockSet);
1679 }
1680
1681 // Drop any remaining references to break cycles.
1682 for (BasicBlock *BB : DeadBlocks)
1683 BB->dropAllReferences();
1684 // Erase them from the IR.
1685 for (BasicBlock *BB : DeadBlocks)
1686 BB->eraseFromParent();
1687}
1688
1691 DominatorTree &DT, LoopInfo &LI,
1692 MemorySSAUpdater *MSSAU,
1693 ScalarEvolution *SE,
1694 LPMUpdater &LoopUpdater) {
1695 // Find all the dead blocks tied to this loop, and remove them from their
1696 // successors.
1698
1699 // Start with loop/exit blocks and get a transitive closure of reachable dead
1700 // blocks.
1701 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1702 ExitBlocks.end());
1703 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1704 while (!DeathCandidates.empty()) {
1705 auto *BB = DeathCandidates.pop_back_val();
1706 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1707 for (BasicBlock *SuccBB : successors(BB)) {
1708 SuccBB->removePredecessor(BB);
1709 DeathCandidates.push_back(SuccBB);
1710 }
1711 DeadBlockSet.insert(BB);
1712 }
1713 }
1714
1715 // Remove all MemorySSA in the dead blocks
1716 if (MSSAU)
1717 MSSAU->removeBlocks(DeadBlockSet);
1718
1719 // Filter out the dead blocks from the exit blocks list so that it can be
1720 // used in the caller.
1721 llvm::erase_if(ExitBlocks,
1722 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1723
1724 // Walk from this loop up through its parents removing all of the dead blocks.
1725 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1726 for (auto *BB : DeadBlockSet)
1727 ParentL->getBlocksSet().erase(BB);
1728 llvm::erase_if(ParentL->getBlocksVector(),
1729 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1730 }
1731
1732 // Now delete the dead child loops. This raw delete will clear them
1733 // recursively.
1734 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1735 if (!DeadBlockSet.count(ChildL->getHeader()))
1736 return false;
1737
1738 assert(llvm::all_of(ChildL->blocks(),
1739 [&](BasicBlock *ChildBB) {
1740 return DeadBlockSet.count(ChildBB);
1741 }) &&
1742 "If the child loop header is dead all blocks in the child loop must "
1743 "be dead as well!");
1744 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1745 if (SE)
1747 LI.destroy(ChildL);
1748 return true;
1749 });
1750
1751 // Remove the loop mappings for the dead blocks and drop all the references
1752 // from these blocks to others to handle cyclic references as we start
1753 // deleting the blocks themselves.
1754 for (auto *BB : DeadBlockSet) {
1755 // Check that the dominator tree has already been updated.
1756 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1757 LI.changeLoopFor(BB, nullptr);
1758 // Drop all uses of the instructions to make sure we won't have dangling
1759 // uses in other blocks.
1760 for (auto &I : *BB)
1761 if (!I.use_empty())
1762 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1763 BB->dropAllReferences();
1764 }
1765
1766 // Actually delete the blocks now that they've been fully unhooked from the
1767 // IR.
1768 for (auto *BB : DeadBlockSet)
1769 BB->eraseFromParent();
1770}
1771
1772/// Recompute the set of blocks in a loop after unswitching.
1773///
1774/// This walks from the original headers predecessors to rebuild the loop. We
1775/// take advantage of the fact that new blocks can't have been added, and so we
1776/// filter by the original loop's blocks. This also handles potentially
1777/// unreachable code that we don't want to explore but might be found examining
1778/// the predecessors of the header.
1779///
1780/// If the original loop is no longer a loop, this will return an empty set. If
1781/// it remains a loop, all the blocks within it will be added to the set
1782/// (including those blocks in inner loops).
1784 LoopInfo &LI) {
1786
1787 auto *PH = L.getLoopPreheader();
1788 auto *Header = L.getHeader();
1789
1790 // A worklist to use while walking backwards from the header.
1792
1793 // First walk the predecessors of the header to find the backedges. This will
1794 // form the basis of our walk.
1795 for (auto *Pred : predecessors(Header)) {
1796 // Skip the preheader.
1797 if (Pred == PH)
1798 continue;
1799
1800 // Because the loop was in simplified form, the only non-loop predecessor
1801 // is the preheader.
1802 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1803 "than the preheader that is not part of the "
1804 "loop!");
1805
1806 // Insert this block into the loop set and on the first visit and, if it
1807 // isn't the header we're currently walking, put it into the worklist to
1808 // recurse through.
1809 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1810 Worklist.push_back(Pred);
1811 }
1812
1813 // If no backedges were found, we're done.
1814 if (LoopBlockSet.empty())
1815 return LoopBlockSet;
1816
1817 // We found backedges, recurse through them to identify the loop blocks.
1818 while (!Worklist.empty()) {
1819 BasicBlock *BB = Worklist.pop_back_val();
1820 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1821
1822 // No need to walk past the header.
1823 if (BB == Header)
1824 continue;
1825
1826 // Because we know the inner loop structure remains valid we can use the
1827 // loop structure to jump immediately across the entire nested loop.
1828 // Further, because it is in loop simplified form, we can directly jump
1829 // to its preheader afterward.
1830 if (Loop *InnerL = LI.getLoopFor(BB))
1831 if (InnerL != &L) {
1832 assert(L.contains(InnerL) &&
1833 "Should not reach a loop *outside* this loop!");
1834 // The preheader is the only possible predecessor of the loop so
1835 // insert it into the set and check whether it was already handled.
1836 auto *InnerPH = InnerL->getLoopPreheader();
1837 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1838 "but not contain the inner loop "
1839 "preheader!");
1840 if (!LoopBlockSet.insert(InnerPH).second)
1841 // The only way to reach the preheader is through the loop body
1842 // itself so if it has been visited the loop is already handled.
1843 continue;
1844
1845 // Insert all of the blocks (other than those already present) into
1846 // the loop set. We expect at least the block that led us to find the
1847 // inner loop to be in the block set, but we may also have other loop
1848 // blocks if they were already enqueued as predecessors of some other
1849 // outer loop block.
1850 for (auto *InnerBB : InnerL->blocks()) {
1851 if (InnerBB == BB) {
1852 assert(LoopBlockSet.count(InnerBB) &&
1853 "Block should already be in the set!");
1854 continue;
1855 }
1856
1857 LoopBlockSet.insert(InnerBB);
1858 }
1859
1860 // Add the preheader to the worklist so we will continue past the
1861 // loop body.
1862 Worklist.push_back(InnerPH);
1863 continue;
1864 }
1865
1866 // Insert any predecessors that were in the original loop into the new
1867 // set, and if the insert is successful, add them to the worklist.
1868 for (auto *Pred : predecessors(BB))
1869 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1870 Worklist.push_back(Pred);
1871 }
1872
1873 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1874
1875 // We've found all the blocks participating in the loop, return our completed
1876 // set.
1877 return LoopBlockSet;
1878}
1879
1880/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1881///
1882/// The removal may have removed some child loops entirely but cannot have
1883/// disturbed any remaining child loops. However, they may need to be hoisted
1884/// to the parent loop (or to be top-level loops). The original loop may be
1885/// completely removed.
1886///
1887/// The sibling loops resulting from this update are returned. If the original
1888/// loop remains a valid loop, it will be the first entry in this list with all
1889/// of the newly sibling loops following it.
1890///
1891/// Returns true if the loop remains a loop after unswitching, and false if it
1892/// is no longer a loop after unswitching (and should not continue to be
1893/// referenced).
1895 LoopInfo &LI,
1896 SmallVectorImpl<Loop *> &HoistedLoops,
1897 ScalarEvolution *SE) {
1898 auto *PH = L.getLoopPreheader();
1899
1900 // Compute the actual parent loop from the exit blocks. Because we may have
1901 // pruned some exits the loop may be different from the original parent.
1902 Loop *ParentL = nullptr;
1903 SmallVector<Loop *, 4> ExitLoops;
1904 SmallVector<BasicBlock *, 4> ExitsInLoops;
1905 ExitsInLoops.reserve(ExitBlocks.size());
1906 for (auto *ExitBB : ExitBlocks)
1907 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1908 ExitLoops.push_back(ExitL);
1909 ExitsInLoops.push_back(ExitBB);
1910 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1911 ParentL = ExitL;
1912 }
1913
1914 // Recompute the blocks participating in this loop. This may be empty if it
1915 // is no longer a loop.
1916 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1917
1918 // If we still have a loop, we need to re-set the loop's parent as the exit
1919 // block set changing may have moved it within the loop nest. Note that this
1920 // can only happen when this loop has a parent as it can only hoist the loop
1921 // *up* the nest.
1922 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1923 // Remove this loop's (original) blocks from all of the intervening loops.
1924 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1925 IL = IL->getParentLoop()) {
1926 IL->getBlocksSet().erase(PH);
1927 for (auto *BB : L.blocks())
1928 IL->getBlocksSet().erase(BB);
1929 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1930 return BB == PH || L.contains(BB);
1931 });
1932 }
1933
1934 LI.changeLoopFor(PH, ParentL);
1935 L.getParentLoop()->removeChildLoop(&L);
1936 if (ParentL)
1937 ParentL->addChildLoop(&L);
1938 else
1939 LI.addTopLevelLoop(&L);
1940 }
1941
1942 // Now we update all the blocks which are no longer within the loop.
1943 auto &Blocks = L.getBlocksVector();
1944 auto BlocksSplitI =
1945 LoopBlockSet.empty()
1946 ? Blocks.begin()
1947 : std::stable_partition(
1948 Blocks.begin(), Blocks.end(),
1949 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1950
1951 // Before we erase the list of unlooped blocks, build a set of them.
1952 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1953 if (LoopBlockSet.empty())
1954 UnloopedBlocks.insert(PH);
1955
1956 // Now erase these blocks from the loop.
1957 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1958 L.getBlocksSet().erase(BB);
1959 Blocks.erase(BlocksSplitI, Blocks.end());
1960
1961 // Sort the exits in ascending loop depth, we'll work backwards across these
1962 // to process them inside out.
1963 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1964 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1965 });
1966
1967 // We'll build up a set for each exit loop.
1968 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1969 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1970
1971 auto RemoveUnloopedBlocksFromLoop =
1972 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1973 for (auto *BB : UnloopedBlocks)
1974 L.getBlocksSet().erase(BB);
1975 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1976 return UnloopedBlocks.count(BB);
1977 });
1978 };
1979
1981 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1982 assert(Worklist.empty() && "Didn't clear worklist!");
1983 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1984
1985 // Grab the next exit block, in decreasing loop depth order.
1986 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1987 Loop &ExitL = *LI.getLoopFor(ExitBB);
1988 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1989
1990 // Erase all of the unlooped blocks from the loops between the previous
1991 // exit loop and this exit loop. This works because the ExitInLoops list is
1992 // sorted in increasing order of loop depth and thus we visit loops in
1993 // decreasing order of loop depth.
1994 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1995 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1996
1997 // Walk the CFG back until we hit the cloned PH adding everything reachable
1998 // and in the unlooped set to this exit block's loop.
1999 Worklist.push_back(ExitBB);
2000 do {
2001 BasicBlock *BB = Worklist.pop_back_val();
2002 // We can stop recursing at the cloned preheader (if we get there).
2003 if (BB == PH)
2004 continue;
2005
2006 for (BasicBlock *PredBB : predecessors(BB)) {
2007 // If this pred has already been moved to our set or is part of some
2008 // (inner) loop, no update needed.
2009 if (!UnloopedBlocks.erase(PredBB)) {
2010 assert((NewExitLoopBlocks.count(PredBB) ||
2011 ExitL.contains(LI.getLoopFor(PredBB))) &&
2012 "Predecessor not in a nested loop (or already visited)!");
2013 continue;
2014 }
2015
2016 // We just insert into the loop set here. We'll add these blocks to the
2017 // exit loop after we build up the set in a deterministic order rather
2018 // than the predecessor-influenced visit order.
2019 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2020 (void)Inserted;
2021 assert(Inserted && "Should only visit an unlooped block once!");
2022
2023 // And recurse through to its predecessors.
2024 Worklist.push_back(PredBB);
2025 }
2026 } while (!Worklist.empty());
2027
2028 // If blocks in this exit loop were directly part of the original loop (as
2029 // opposed to a child loop) update the map to point to this exit loop. This
2030 // just updates a map and so the fact that the order is unstable is fine.
2031 for (auto *BB : NewExitLoopBlocks)
2032 if (Loop *BBL = LI.getLoopFor(BB))
2033 if (BBL == &L || !L.contains(BBL))
2034 LI.changeLoopFor(BB, &ExitL);
2035
2036 // We will remove the remaining unlooped blocks from this loop in the next
2037 // iteration or below.
2038 NewExitLoopBlocks.clear();
2039 }
2040
2041 // Any remaining unlooped blocks are no longer part of any loop unless they
2042 // are part of some child loop.
2043 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2044 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2045 for (auto *BB : UnloopedBlocks)
2046 if (Loop *BBL = LI.getLoopFor(BB))
2047 if (BBL == &L || !L.contains(BBL))
2048 LI.changeLoopFor(BB, nullptr);
2049
2050 // Sink all the child loops whose headers are no longer in the loop set to
2051 // the parent (or to be top level loops). We reach into the loop and directly
2052 // update its subloop vector to make this batch update efficient.
2053 auto &SubLoops = L.getSubLoopsVector();
2054 auto SubLoopsSplitI =
2055 LoopBlockSet.empty()
2056 ? SubLoops.begin()
2057 : std::stable_partition(
2058 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2059 return LoopBlockSet.count(SubL->getHeader());
2060 });
2061 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2062 HoistedLoops.push_back(HoistedL);
2063 HoistedL->setParentLoop(nullptr);
2064
2065 // To compute the new parent of this hoisted loop we look at where we
2066 // placed the preheader above. We can't lookup the header itself because we
2067 // retained the mapping from the header to the hoisted loop. But the
2068 // preheader and header should have the exact same new parent computed
2069 // based on the set of exit blocks from the original loop as the preheader
2070 // is a predecessor of the header and so reached in the reverse walk. And
2071 // because the loops were all in simplified form the preheader of the
2072 // hoisted loop can't be part of some *other* loop.
2073 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2074 NewParentL->addChildLoop(HoistedL);
2075 else
2076 LI.addTopLevelLoop(HoistedL);
2077 }
2078 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2079
2080 // Actually delete the loop if nothing remained within it.
2081 if (Blocks.empty()) {
2082 assert(SubLoops.empty() &&
2083 "Failed to remove all subloops from the original loop!");
2084 if (Loop *ParentL = L.getParentLoop())
2085 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2086 else
2087 LI.removeLoop(llvm::find(LI, &L));
2088 // markLoopAsDeleted for L should be triggered by the caller (it is
2089 // typically done within postUnswitch).
2090 if (SE)
2092 LI.destroy(&L);
2093 return false;
2094 }
2095
2096 return true;
2097}
2098
2099/// Helper to visit a dominator subtree, invoking a callable on each node.
2100///
2101/// Returning false at any point will stop walking past that node of the tree.
2102template <typename CallableT>
2103void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2105 DomWorklist.push_back(DT[BB]);
2106#ifndef NDEBUG
2108 Visited.insert(DT[BB]);
2109#endif
2110 do {
2111 DomTreeNode *N = DomWorklist.pop_back_val();
2112
2113 // Visit this node.
2114 if (!Callable(N->getBlock()))
2115 continue;
2116
2117 // Accumulate the child nodes.
2118 for (DomTreeNode *ChildN : *N) {
2119 assert(Visited.insert(ChildN).second &&
2120 "Cannot visit a node twice when walking a tree!");
2121 DomWorklist.push_back(ChildN);
2122 }
2123 } while (!DomWorklist.empty());
2124}
2125
2127 bool CurrentLoopValid, bool PartiallyInvariant,
2128 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2129 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2130 if (!NewLoops.empty())
2131 U.addSiblingLoops(NewLoops);
2132
2133 // If the current loop remains valid, we should revisit it to catch any
2134 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2135 if (CurrentLoopValid) {
2136 if (PartiallyInvariant) {
2137 // Mark the new loop as partially unswitched, to avoid unswitching on
2138 // the same condition again.
2139 auto &Context = L.getHeader()->getContext();
2140 MDNode *DisableUnswitchMD = MDNode::get(
2141 Context,
2142 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2144 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2145 {DisableUnswitchMD});
2146 L.setLoopID(NewLoopID);
2147 } else if (InjectedCondition) {
2148 // Do the same for injection of invariant conditions.
2149 auto &Context = L.getHeader()->getContext();
2150 MDNode *DisableUnswitchMD = MDNode::get(
2151 Context,
2152 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2154 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2155 {DisableUnswitchMD});
2156 L.setLoopID(NewLoopID);
2157 } else
2158 U.revisitCurrentLoop();
2159 } else
2160 U.markLoopAsDeleted(L, LoopName);
2161}
2162
2164 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2165 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2167 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2168 auto *ParentBB = TI.getParent();
2169 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2170 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2171
2172 // Save the current loop name in a variable so that we can report it even
2173 // after it has been deleted.
2174 std::string LoopName(L.getName());
2175
2176 // We can only unswitch switches, conditional branches with an invariant
2177 // condition, or combining invariant conditions with an instruction or
2178 // partially invariant instructions.
2179 assert((SI || (BI && BI->isConditional())) &&
2180 "Can only unswitch switches and conditional branch!");
2181 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2182 bool FullUnswitch =
2183 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2184 !PartiallyInvariant);
2185 if (FullUnswitch)
2186 assert(Invariants.size() == 1 &&
2187 "Cannot have other invariants with full unswitching!");
2188 else
2189 assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
2190 "Partial unswitching requires an instruction as the condition!");
2191
2192 if (MSSAU && VerifyMemorySSA)
2193 MSSAU->getMemorySSA()->verifyMemorySSA();
2194
2195 // Constant and BBs tracking the cloned and continuing successor. When we are
2196 // unswitching the entire condition, this can just be trivially chosen to
2197 // unswitch towards `true`. However, when we are unswitching a set of
2198 // invariants combined with `and` or `or` or partially invariant instructions,
2199 // the combining operation determines the best direction to unswitch: we want
2200 // to unswitch the direction that will collapse the branch.
2201 bool Direction = true;
2202 int ClonedSucc = 0;
2203 if (!FullUnswitch) {
2205 (void)Cond;
2207 PartiallyInvariant) &&
2208 "Only `or`, `and`, an `select`, partially invariant instructions "
2209 "can combine invariants being unswitched.");
2210 if (!match(Cond, m_LogicalOr())) {
2211 if (match(Cond, m_LogicalAnd()) ||
2212 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2213 Direction = false;
2214 ClonedSucc = 1;
2215 }
2216 }
2217 }
2218
2219 BasicBlock *RetainedSuccBB =
2220 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2221 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2222 if (BI)
2223 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2224 else
2225 for (auto Case : SI->cases())
2226 if (Case.getCaseSuccessor() != RetainedSuccBB)
2227 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2228
2229 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2230 "Should not unswitch the same successor we are retaining!");
2231
2232 // The branch should be in this exact loop. Any inner loop's invariant branch
2233 // should be handled by unswitching that inner loop. The caller of this
2234 // routine should filter out any candidates that remain (but were skipped for
2235 // whatever reason).
2236 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2237
2238 // Compute the parent loop now before we start hacking on things.
2239 Loop *ParentL = L.getParentLoop();
2240 // Get blocks in RPO order for MSSA update, before changing the CFG.
2241 LoopBlocksRPO LBRPO(&L);
2242 if (MSSAU)
2243 LBRPO.perform(&LI);
2244
2245 // Compute the outer-most loop containing one of our exit blocks. This is the
2246 // furthest up our loopnest which can be mutated, which we will use below to
2247 // update things.
2248 Loop *OuterExitL = &L;
2250 L.getUniqueExitBlocks(ExitBlocks);
2251 for (auto *ExitBB : ExitBlocks) {
2252 // ExitBB can be an exit block for several levels in the loop nest. Make
2253 // sure we find the top most.
2254 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2255 if (!NewOuterExitL) {
2256 // We exited the entire nest with this block, so we're done.
2257 OuterExitL = nullptr;
2258 break;
2259 }
2260 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2261 OuterExitL = NewOuterExitL;
2262 }
2263
2264 // At this point, we're definitely going to unswitch something so invalidate
2265 // any cached information in ScalarEvolution for the outer most loop
2266 // containing an exit block and all nested loops.
2267 if (SE) {
2268 if (OuterExitL)
2269 SE->forgetLoop(OuterExitL);
2270 else
2271 SE->forgetTopmostLoop(&L);
2273 }
2274
2275 // If the edge from this terminator to a successor dominates that successor,
2276 // store a map from each block in its dominator subtree to it. This lets us
2277 // tell when cloning for a particular successor if a block is dominated by
2278 // some *other* successor with a single data structure. We use this to
2279 // significantly reduce cloning.
2281 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2282 UnswitchedSuccBBs))
2283 if (SuccBB->getUniquePredecessor() ||
2284 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2285 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2286 }))
2287 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2288 DominatingSucc[BB] = SuccBB;
2289 return true;
2290 });
2291
2292 // Split the preheader, so that we know that there is a safe place to insert
2293 // the conditional branch. We will change the preheader to have a conditional
2294 // branch on LoopCond. The original preheader will become the split point
2295 // between the unswitched versions, and we will have a new preheader for the
2296 // original loop.
2297 BasicBlock *SplitBB = L.getLoopPreheader();
2298 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2299
2300 // Keep track of the dominator tree updates needed.
2302
2303 // Clone the loop for each unswitched successor.
2305 VMaps.reserve(UnswitchedSuccBBs.size());
2307 for (auto *SuccBB : UnswitchedSuccBBs) {
2308 VMaps.emplace_back(new ValueToValueMapTy());
2309 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2310 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2311 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2312 }
2313
2314 // Drop metadata if we may break its semantics by moving this instr into the
2315 // split block.
2316 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2318 // Do not spend time trying to understand if we can keep it, just drop it
2319 // to save compile time.
2320 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2321 else {
2322 // It is only legal to preserve make.implicit metadata if we are
2323 // guaranteed no reach implicit null check after following this branch.
2324 ICFLoopSafetyInfo SafetyInfo;
2325 SafetyInfo.computeLoopSafetyInfo(&L);
2326 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2327 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2328 }
2329 }
2330
2331 // The stitching of the branched code back together depends on whether we're
2332 // doing full unswitching or not with the exception that we always want to
2333 // nuke the initial terminator placed in the split block.
2334 SplitBB->getTerminator()->eraseFromParent();
2335 if (FullUnswitch) {
2336 // Splice the terminator from the original loop and rewrite its
2337 // successors.
2338 TI.moveBefore(*SplitBB, SplitBB->end());
2339
2340 // Keep a clone of the terminator for MSSA updates.
2341 Instruction *NewTI = TI.clone();
2342 NewTI->insertInto(ParentBB, ParentBB->end());
2343
2344 // First wire up the moved terminator to the preheaders.
2345 if (BI) {
2346 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2347 BI->setSuccessor(ClonedSucc, ClonedPH);
2348 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2350 if (InsertFreeze)
2351 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2352 BI->setCondition(Cond);
2353 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2354 } else {
2355 assert(SI && "Must either be a branch or switch!");
2356
2357 // Walk the cases and directly update their successors.
2358 assert(SI->getDefaultDest() == RetainedSuccBB &&
2359 "Not retaining default successor!");
2360 SI->setDefaultDest(LoopPH);
2361 for (const auto &Case : SI->cases())
2362 if (Case.getCaseSuccessor() == RetainedSuccBB)
2363 Case.setSuccessor(LoopPH);
2364 else
2365 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2366
2367 if (InsertFreeze)
2368 SI->setCondition(new FreezeInst(SI->getCondition(),
2369 SI->getCondition()->getName() + ".fr",
2370 SI->getIterator()));
2371
2372 // We need to use the set to populate domtree updates as even when there
2373 // are multiple cases pointing at the same successor we only want to
2374 // remove and insert one edge in the domtree.
2375 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2376 DTUpdates.push_back(
2377 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2378 }
2379
2380 if (MSSAU) {
2381 DT.applyUpdates(DTUpdates);
2382 DTUpdates.clear();
2383
2384 // Remove all but one edge to the retained block and all unswitched
2385 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2386 // when we know we only keep a single edge for each case.
2387 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2388 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2389 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2390
2391 for (auto &VMap : VMaps)
2392 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2393 /*IgnoreIncomingWithNoClones=*/true);
2394 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2395
2396 // Remove all edges to unswitched blocks.
2397 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2398 MSSAU->removeEdge(ParentBB, SuccBB);
2399 }
2400
2401 // Now unhook the successor relationship as we'll be replacing
2402 // the terminator with a direct branch. This is much simpler for branches
2403 // than switches so we handle those first.
2404 if (BI) {
2405 // Remove the parent as a predecessor of the unswitched successor.
2406 assert(UnswitchedSuccBBs.size() == 1 &&
2407 "Only one possible unswitched block for a branch!");
2408 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2409 UnswitchedSuccBB->removePredecessor(ParentBB,
2410 /*KeepOneInputPHIs*/ true);
2411 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2412 } else {
2413 // Note that we actually want to remove the parent block as a predecessor
2414 // of *every* case successor. The case successor is either unswitched,
2415 // completely eliminating an edge from the parent to that successor, or it
2416 // is a duplicate edge to the retained successor as the retained successor
2417 // is always the default successor and as we'll replace this with a direct
2418 // branch we no longer need the duplicate entries in the PHI nodes.
2419 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2420 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2421 "Not retaining default successor!");
2422 for (const auto &Case : NewSI->cases())
2423 Case.getCaseSuccessor()->removePredecessor(
2424 ParentBB,
2425 /*KeepOneInputPHIs*/ true);
2426
2427 // We need to use the set to populate domtree updates as even when there
2428 // are multiple cases pointing at the same successor we only want to
2429 // remove and insert one edge in the domtree.
2430 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2431 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2432 }
2433
2434 // After MSSAU update, remove the cloned terminator instruction NewTI.
2435 ParentBB->getTerminator()->eraseFromParent();
2436
2437 // Create a new unconditional branch to the continuing block (as opposed to
2438 // the one cloned).
2439 BranchInst::Create(RetainedSuccBB, ParentBB);
2440 } else {
2441 assert(BI && "Only branches have partial unswitching.");
2442 assert(UnswitchedSuccBBs.size() == 1 &&
2443 "Only one possible unswitched block for a branch!");
2444 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2445 // When doing a partial unswitch, we have to do a bit more work to build up
2446 // the branch in the split block.
2447 if (PartiallyInvariant)
2449 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2450 else {
2452 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2453 FreezeLoopUnswitchCond, BI, &AC, DT);
2454 }
2455 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2456
2457 if (MSSAU) {
2458 DT.applyUpdates(DTUpdates);
2459 DTUpdates.clear();
2460
2461 // Perform MSSA cloning updates.
2462 for (auto &VMap : VMaps)
2463 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2464 /*IgnoreIncomingWithNoClones=*/true);
2465 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2466 }
2467 }
2468
2469 // Apply the updates accumulated above to get an up-to-date dominator tree.
2470 DT.applyUpdates(DTUpdates);
2471
2472 // Now that we have an accurate dominator tree, first delete the dead cloned
2473 // blocks so that we can accurately build any cloned loops. It is important to
2474 // not delete the blocks from the original loop yet because we still want to
2475 // reference the original loop to understand the cloned loop's structure.
2476 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2477
2478 // Build the cloned loop structure itself. This may be substantially
2479 // different from the original structure due to the simplified CFG. This also
2480 // handles inserting all the cloned blocks into the correct loops.
2481 SmallVector<Loop *, 4> NonChildClonedLoops;
2482 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2483 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2484
2485 // Now that our cloned loops have been built, we can update the original loop.
2486 // First we delete the dead blocks from it and then we rebuild the loop
2487 // structure taking these deletions into account.
2488 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2489
2490 if (MSSAU && VerifyMemorySSA)
2491 MSSAU->getMemorySSA()->verifyMemorySSA();
2492
2493 SmallVector<Loop *, 4> HoistedLoops;
2494 bool IsStillLoop =
2495 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2496
2497 if (MSSAU && VerifyMemorySSA)
2498 MSSAU->getMemorySSA()->verifyMemorySSA();
2499
2500 // This transformation has a high risk of corrupting the dominator tree, and
2501 // the below steps to rebuild loop structures will result in hard to debug
2502 // errors in that case so verify that the dominator tree is sane first.
2503 // FIXME: Remove this when the bugs stop showing up and rely on existing
2504 // verification steps.
2505 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2506
2507 if (BI && !PartiallyInvariant) {
2508 // If we unswitched a branch which collapses the condition to a known
2509 // constant we want to replace all the uses of the invariants within both
2510 // the original and cloned blocks. We do this here so that we can use the
2511 // now updated dominator tree to identify which side the users are on.
2512 assert(UnswitchedSuccBBs.size() == 1 &&
2513 "Only one possible unswitched block for a branch!");
2514 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2515
2516 // When considering multiple partially-unswitched invariants
2517 // we cant just go replace them with constants in both branches.
2518 //
2519 // For 'AND' we infer that true branch ("continue") means true
2520 // for each invariant operand.
2521 // For 'OR' we can infer that false branch ("continue") means false
2522 // for each invariant operand.
2523 // So it happens that for multiple-partial case we dont replace
2524 // in the unswitched branch.
2525 bool ReplaceUnswitched =
2526 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2527
2528 ConstantInt *UnswitchedReplacement =
2531 ConstantInt *ContinueReplacement =
2534 for (Value *Invariant : Invariants) {
2535 assert(!isa<Constant>(Invariant) &&
2536 "Should not be replacing constant values!");
2537 // Use make_early_inc_range here as set invalidates the iterator.
2538 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2539 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2540 if (!UserI)
2541 continue;
2542
2543 // Replace it with the 'continue' side if in the main loop body, and the
2544 // unswitched if in the cloned blocks.
2545 if (DT.dominates(LoopPH, UserI->getParent()))
2546 U.set(ContinueReplacement);
2547 else if (ReplaceUnswitched &&
2548 DT.dominates(ClonedPH, UserI->getParent()))
2549 U.set(UnswitchedReplacement);
2550 }
2551 }
2552 }
2553
2554 // We can change which blocks are exit blocks of all the cloned sibling
2555 // loops, the current loop, and any parent loops which shared exit blocks
2556 // with the current loop. As a consequence, we need to re-form LCSSA for
2557 // them. But we shouldn't need to re-form LCSSA for any child loops.
2558 // FIXME: This could be made more efficient by tracking which exit blocks are
2559 // new, and focusing on them, but that isn't likely to be necessary.
2560 //
2561 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2562 // loop nest and update every loop that could have had its exits changed. We
2563 // also need to cover any intervening loops. We add all of these loops to
2564 // a list and sort them by loop depth to achieve this without updating
2565 // unnecessary loops.
2566 auto UpdateLoop = [&](Loop &UpdateL) {
2567#ifndef NDEBUG
2568 UpdateL.verifyLoop();
2569 for (Loop *ChildL : UpdateL) {
2570 ChildL->verifyLoop();
2571 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2572 "Perturbed a child loop's LCSSA form!");
2573 }
2574#endif
2575 // First build LCSSA for this loop so that we can preserve it when
2576 // forming dedicated exits. We don't want to perturb some other loop's
2577 // LCSSA while doing that CFG edit.
2578 formLCSSA(UpdateL, DT, &LI, SE);
2579
2580 // For loops reached by this loop's original exit blocks we may
2581 // introduced new, non-dedicated exits. At least try to re-form dedicated
2582 // exits for these loops. This may fail if they couldn't have dedicated
2583 // exits to start with.
2584 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2585 };
2586
2587 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2588 // and we can do it in any order as they don't nest relative to each other.
2589 //
2590 // Also check if any of the loops we have updated have become top-level loops
2591 // as that will necessitate widening the outer loop scope.
2592 for (Loop *UpdatedL :
2593 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2594 UpdateLoop(*UpdatedL);
2595 if (UpdatedL->isOutermost())
2596 OuterExitL = nullptr;
2597 }
2598 if (IsStillLoop) {
2599 UpdateLoop(L);
2600 if (L.isOutermost())
2601 OuterExitL = nullptr;
2602 }
2603
2604 // If the original loop had exit blocks, walk up through the outer most loop
2605 // of those exit blocks to update LCSSA and form updated dedicated exits.
2606 if (OuterExitL != &L)
2607 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2608 OuterL = OuterL->getParentLoop())
2609 UpdateLoop(*OuterL);
2610
2611#ifndef NDEBUG
2612 // Verify the entire loop structure to catch any incorrect updates before we
2613 // progress in the pass pipeline.
2614 LI.verify(DT);
2615#endif
2616
2617 // Now that we've unswitched something, make callbacks to report the changes.
2618 // For that we need to merge together the updated loops and the cloned loops
2619 // and check whether the original loop survived.
2620 SmallVector<Loop *, 4> SibLoops;
2621 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2622 if (UpdatedL->getParentLoop() == ParentL)
2623 SibLoops.push_back(UpdatedL);
2624 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2625 InjectedCondition, SibLoops);
2626
2627 if (MSSAU && VerifyMemorySSA)
2628 MSSAU->getMemorySSA()->verifyMemorySSA();
2629
2630 if (BI)
2631 ++NumBranches;
2632 else
2633 ++NumSwitches;
2634}
2635
2636/// Recursively compute the cost of a dominator subtree based on the per-block
2637/// cost map provided.
2638///
2639/// The recursive computation is memozied into the provided DT-indexed cost map
2640/// to allow querying it for most nodes in the domtree without it becoming
2641/// quadratic.
2643 DomTreeNode &N,
2646 // Don't accumulate cost (or recurse through) blocks not in our block cost
2647 // map and thus not part of the duplication cost being considered.
2648 auto BBCostIt = BBCostMap.find(N.getBlock());
2649 if (BBCostIt == BBCostMap.end())
2650 return 0;
2651
2652 // Lookup this node to see if we already computed its cost.
2653 auto DTCostIt = DTCostMap.find(&N);
2654 if (DTCostIt != DTCostMap.end())
2655 return DTCostIt->second;
2656
2657 // If not, we have to compute it. We can't use insert above and update
2658 // because computing the cost may insert more things into the map.
2659 InstructionCost Cost = std::accumulate(
2660 N.begin(), N.end(), BBCostIt->second,
2661 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2662 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2663 });
2664 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2665 (void)Inserted;
2666 assert(Inserted && "Should not insert a node while visiting children!");
2667 return Cost;
2668}
2669
2670/// Turns a select instruction into implicit control flow branch,
2671/// making the following replacement:
2672///
2673/// head:
2674/// --code before select--
2675/// select %cond, %trueval, %falseval
2676/// --code after select--
2677///
2678/// into
2679///
2680/// head:
2681/// --code before select--
2682/// br i1 %cond, label %then, label %tail
2683///
2684/// then:
2685/// br %tail
2686///
2687/// tail:
2688/// phi [ %trueval, %then ], [ %falseval, %head]
2689/// unreachable
2690///
2691/// It also makes all relevant DT and LI updates, so that all structures are in
2692/// valid state after this transform.
2694 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2695 AssumptionCache *AC) {
2696 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2697 BasicBlock *HeadBB = SI->getParent();
2698
2699 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2700 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2701 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2702 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2703 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2704 *TailBB = CondBr->getSuccessor(1);
2705 if (MSSAU)
2706 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2707
2708 PHINode *Phi =
2709 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
2710 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2711 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2712 SI->replaceAllUsesWith(Phi);
2713 SI->eraseFromParent();
2714
2715 if (MSSAU && VerifyMemorySSA)
2716 MSSAU->getMemorySSA()->verifyMemorySSA();
2717
2718 ++NumSelects;
2719 return CondBr;
2720}
2721
2722/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2723/// making the following replacement:
2724///
2725/// --code before guard--
2726/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2727/// --code after guard--
2728///
2729/// into
2730///
2731/// --code before guard--
2732/// br i1 %cond, label %guarded, label %deopt
2733///
2734/// guarded:
2735/// --code after guard--
2736///
2737/// deopt:
2738/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2739/// unreachable
2740///
2741/// It also makes all relevant DT and LI updates, so that all structures are in
2742/// valid state after this transform.
2744 DominatorTree &DT, LoopInfo &LI,
2745 MemorySSAUpdater *MSSAU) {
2747 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2748 BasicBlock *CheckBB = GI->getParent();
2749
2750 if (MSSAU && VerifyMemorySSA)
2751 MSSAU->getMemorySSA()->verifyMemorySSA();
2752
2753 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2754 Instruction *DeoptBlockTerm =
2756 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2757 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2758 // SplitBlockAndInsertIfThen inserts control flow that branches to
2759 // DeoptBlockTerm if the condition is true. We want the opposite.
2760 CheckBI->swapSuccessors();
2761
2762 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2763 GuardedBlock->setName("guarded");
2764 CheckBI->getSuccessor(1)->setName("deopt");
2765 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2766
2767 if (MSSAU)
2768 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2769
2770 GI->moveBefore(DeoptBlockTerm);
2772
2773 if (MSSAU) {
2774 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2775 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2776 if (VerifyMemorySSA)
2777 MSSAU->getMemorySSA()->verifyMemorySSA();
2778 }
2779
2780 if (VerifyLoopInfo)
2781 LI.verify(DT);
2782 ++NumGuards;
2783 return CheckBI;
2784}
2785
2786/// Cost multiplier is a way to limit potentially exponential behavior
2787/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2788/// candidates available. Also accounting for the number of "sibling" loops with
2789/// the idea to account for previous unswitches that already happened on this
2790/// cluster of loops. There was an attempt to keep this formula simple,
2791/// just enough to limit the worst case behavior. Even if it is not that simple
2792/// now it is still not an attempt to provide a detailed heuristic size
2793/// prediction.
2794///
2795/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2796/// unswitch candidates, making adequate predictions instead of wild guesses.
2797/// That requires knowing not just the number of "remaining" candidates but
2798/// also costs of unswitching for each of these candidates.
2800 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2801 const DominatorTree &DT,
2802 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2803
2804 // Guards and other exiting conditions do not contribute to exponential
2805 // explosion as soon as they dominate the latch (otherwise there might be
2806 // another path to the latch remaining that does not allow to eliminate the
2807 // loop copy on unswitch).
2808 const BasicBlock *Latch = L.getLoopLatch();
2809 const BasicBlock *CondBlock = TI.getParent();
2810 if (DT.dominates(CondBlock, Latch) &&
2811 (isGuard(&TI) ||
2812 (TI.isTerminator() &&
2813 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2814 return L.contains(SuccBB);
2815 }) <= 1))) {
2816 NumCostMultiplierSkipped++;
2817 return 1;
2818 }
2819
2820 auto *ParentL = L.getParentLoop();
2821 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2822 : std::distance(LI.begin(), LI.end()));
2823 // Count amount of clones that all the candidates might cause during
2824 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2825 // cases.
2826 int UnswitchedClones = 0;
2827 for (const auto &Candidate : UnswitchCandidates) {
2828 const Instruction *CI = Candidate.TI;
2829 const BasicBlock *CondBlock = CI->getParent();
2830 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2831 if (isa<SelectInst>(CI)) {
2832 UnswitchedClones++;
2833 continue;
2834 }
2835 if (isGuard(CI)) {
2836 if (!SkipExitingSuccessors)
2837 UnswitchedClones++;
2838 continue;
2839 }
2840 int NonExitingSuccessors =
2841 llvm::count_if(successors(CondBlock),
2842 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2843 return !SkipExitingSuccessors || L.contains(SuccBB);
2844 });
2845 UnswitchedClones += Log2_32(NonExitingSuccessors);
2846 }
2847
2848 // Ignore up to the "unscaled candidates" number of unswitch candidates
2849 // when calculating the power-of-two scaling of the cost. The main idea
2850 // with this control is to allow a small number of unswitches to happen
2851 // and rely more on siblings multiplier (see below) when the number
2852 // of candidates is small.
2853 unsigned ClonesPower =
2854 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2855
2856 // Allowing top-level loops to spread a bit more than nested ones.
2857 int SiblingsMultiplier =
2858 std::max((ParentL ? SiblingsCount
2859 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2860 1);
2861 // Compute the cost multiplier in a way that won't overflow by saturating
2862 // at an upper bound.
2863 int CostMultiplier;
2864 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2865 SiblingsMultiplier > UnswitchThreshold)
2866 CostMultiplier = UnswitchThreshold;
2867 else
2868 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2869 (int)UnswitchThreshold);
2870
2871 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2872 << " (siblings " << SiblingsMultiplier << " * clones "
2873 << (1 << ClonesPower) << ")"
2874 << " for unswitch candidate: " << TI << "\n");
2875 return CostMultiplier;
2876}
2877
2880 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2881 const Loop &L, const LoopInfo &LI, AAResults &AA,
2882 const MemorySSAUpdater *MSSAU) {
2883 assert(UnswitchCandidates.empty() && "Should be!");
2884
2885 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2887 if (isa<Constant>(Cond))
2888 return;
2889 if (L.isLoopInvariant(Cond)) {
2890 UnswitchCandidates.push_back({I, {Cond}});
2891 return;
2892 }
2894 TinyPtrVector<Value *> Invariants =
2896 L, *static_cast<Instruction *>(Cond), LI);
2897 if (!Invariants.empty())
2898 UnswitchCandidates.push_back({I, std::move(Invariants)});
2899 }
2900 };
2901
2902 // Whether or not we should also collect guards in the loop.
2903 bool CollectGuards = false;
2904 if (UnswitchGuards) {
2905 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2906 Intrinsic::getName(Intrinsic::experimental_guard));
2907 if (GuardDecl && !GuardDecl->use_empty())
2908 CollectGuards = true;
2909 }
2910
2911 for (auto *BB : L.blocks()) {
2912 if (LI.getLoopFor(BB) != &L)
2913 continue;
2914
2915 for (auto &I : *BB) {
2916 if (auto *SI = dyn_cast<SelectInst>(&I)) {
2917 auto *Cond = SI->getCondition();
2918 // Do not unswitch vector selects and logical and/or selects
2919 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2920 AddUnswitchCandidatesForInst(SI, Cond);
2921 } else if (CollectGuards && isGuard(&I)) {
2922 auto *Cond =
2923 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2924 // TODO: Support AND, OR conditions and partial unswitching.
2925 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2926 UnswitchCandidates.push_back({&I, {Cond}});
2927 }
2928 }
2929
2930 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2931 // We can only consider fully loop-invariant switch conditions as we need
2932 // to completely eliminate the switch after unswitching.
2933 if (!isa<Constant>(SI->getCondition()) &&
2934 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2935 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2936 continue;
2937 }
2938
2939 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2940 if (!BI || !BI->isConditional() ||
2941 BI->getSuccessor(0) == BI->getSuccessor(1))
2942 continue;
2943
2944 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2945 }
2946
2947 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2948 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2949 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2950 })) {
2951 MemorySSA *MSSA = MSSAU->getMemorySSA();
2952 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2953 LLVM_DEBUG(
2954 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2955 << *Info->InstToDuplicate[0] << "\n");
2956 PartialIVInfo = *Info;
2957 PartialIVCondBranch = L.getHeader()->getTerminator();
2958 TinyPtrVector<Value *> ValsToDuplicate;
2959 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2960 UnswitchCandidates.push_back(
2961 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2962 }
2963 }
2964 return !UnswitchCandidates.empty();
2965}
2966
2967/// Tries to canonicalize condition described by:
2968///
2969/// br (LHS pred RHS), label IfTrue, label IfFalse
2970///
2971/// into its equivalent where `Pred` is something that we support for injected
2972/// invariants (so far it is limited to ult), LHS in canonicalized form is
2973/// non-invariant and RHS is an invariant.
2975 ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue,
2976 BasicBlock *&IfFalse, const Loop &L) {
2977 if (!L.contains(IfTrue)) {
2978 Pred = ICmpInst::getInversePredicate(Pred);
2979 std::swap(IfTrue, IfFalse);
2980 }
2981
2982 // Move loop-invariant argument to RHS position.
2983 if (L.isLoopInvariant(LHS)) {
2984 Pred = ICmpInst::getSwappedPredicate(Pred);
2985 std::swap(LHS, RHS);
2986 }
2987
2988 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
2989 // Turn "x >=s 0" into "x <u UMIN_INT"
2990 Pred = ICmpInst::ICMP_ULT;
2991 RHS = ConstantInt::get(
2992 RHS->getContext(),
2994 }
2995}
2996
2997/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
2998/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
2999/// injecting a loop-invariant condition.
3001 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3002 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3003 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3004 return false;
3005 // TODO: Support other predicates.
3006 if (Pred != ICmpInst::ICMP_ULT)
3007 return false;
3008 // TODO: Support non-loop-exiting branches?
3009 if (!L.contains(IfTrue) || L.contains(IfFalse))
3010 return false;
3011 // FIXME: For some reason this causes problems with MSSA updates, need to
3012 // investigate why. So far, just don't unswitch latch.
3013 if (L.getHeader() == IfTrue)
3014 return false;
3015 return true;
3016}
3017
3018/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3019/// TakenSucc via injection of invariant conditions. The branch should be not
3020/// enough and not previously unswitched, the information about this comes from
3021/// the metadata.
3023 const BasicBlock *TakenSucc) {
3024 SmallVector<uint32_t> Weights;
3025 if (!extractBranchWeights(*BI, Weights))
3026 return false;
3028 BranchProbability LikelyTaken(T - 1, T);
3029
3030 assert(Weights.size() == 2 && "Unexpected profile data!");
3031 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3032 auto Num = Weights[Idx];
3033 auto Denom = Weights[0] + Weights[1];
3034 // Degenerate or overflowed metadata.
3035 if (Denom == 0 || Num > Denom)
3036 return false;
3037 BranchProbability ActualTaken(Num, Denom);
3038 if (LikelyTaken > ActualTaken)
3039 return false;
3040 return true;
3041}
3042
3043/// Materialize pending invariant condition of the given candidate into IR. The
3044/// injected loop-invariant condition implies the original loop-variant branch
3045/// condition, so the materialization turns
3046///
3047/// loop_block:
3048/// ...
3049/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3050///
3051/// into
3052///
3053/// preheader:
3054/// %invariant_cond = LHS pred RHS
3055/// ...
3056/// loop_block:
3057/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3058/// OriginalCheck:
3059/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3060/// ...
3061static NonTrivialUnswitchCandidate
3062injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3063 DominatorTree &DT, LoopInfo &LI,
3064 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3065 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3066 BasicBlock *Preheader = L.getLoopPreheader();
3067 assert(Preheader && "Loop is not in simplified form?");
3068 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3069 "Unswitching branch of inner loop!");
3070
3071 auto Pred = Candidate.PendingInjection->Pred;
3072 auto *LHS = Candidate.PendingInjection->LHS;
3073 auto *RHS = Candidate.PendingInjection->RHS;
3074 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3075 auto *TI = cast<BranchInst>(Candidate.TI);
3076 auto *BB = Candidate.TI->getParent();
3077 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3078 : TI->getSuccessor(0);
3079 // FIXME: Remove this once limitation on successors is lifted.
3080 assert(L.contains(InLoopSucc) && "Not supported yet!");
3081 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3082 auto &Ctx = BB->getContext();
3083
3084 IRBuilder<> Builder(Preheader->getTerminator());
3085 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3086 if (LHS->getType() != RHS->getType()) {
3087 if (LHS->getType()->getIntegerBitWidth() <
3089 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3090 else
3091 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3092 }
3093 // Do not use builder here: CreateICmp may simplify this into a constant and
3094 // unswitching will break. Better optimize it away later.
3095 auto *InjectedCond =
3096 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3097 Preheader->getTerminator()->getIterator());
3098
3099 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3100 BB->getParent(), InLoopSucc);
3101 Builder.SetInsertPoint(TI);
3102 auto *InvariantBr =
3103 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3104
3105 Builder.SetInsertPoint(CheckBlock);
3106 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3107 TI->getSuccessor(1));
3108 TI->eraseFromParent();
3109
3110 // Fixup phis.
3111 for (auto &I : *InLoopSucc) {
3112 auto *PN = dyn_cast<PHINode>(&I);
3113 if (!PN)
3114 break;
3115 auto *Inc = PN->getIncomingValueForBlock(BB);
3116 PN->addIncoming(Inc, CheckBlock);
3117 }
3118 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3119
3121 { DominatorTree::Insert, BB, CheckBlock },
3122 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3123 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3124 { DominatorTree::Delete, BB, OutOfLoopSucc }
3125 };
3126
3127 DT.applyUpdates(DTUpdates);
3128 if (MSSAU)
3129 MSSAU->applyUpdates(DTUpdates, DT);
3130 L.addBasicBlockToLoop(CheckBlock, LI);
3131
3132#ifndef NDEBUG
3133 DT.verify();
3134 LI.verify(DT);
3135 if (MSSAU && VerifyMemorySSA)
3136 MSSAU->getMemorySSA()->verifyMemorySSA();
3137#endif
3138
3139 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3140 // higher because we have just inserted a new block. Need to think how to
3141 // adjust the cost of injected candidates when it was first computed.
3142 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3143 << " and considering it for unswitching.");
3144 ++NumInvariantConditionsInjected;
3145 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3146 Candidate.Cost);
3147}
3148
3149/// Given chain of loop branch conditions looking like:
3150/// br (Variant < Invariant1)
3151/// br (Variant < Invariant2)
3152/// br (Variant < Invariant3)
3153/// ...
3154/// collect set of invariant conditions on which we want to unswitch, which
3155/// look like:
3156/// Invariant1 <= Invariant2
3157/// Invariant2 <= Invariant3
3158/// ...
3159/// Though they might not immediately exist in the IR, we can still inject them.
3161 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3163 const DominatorTree &DT) {
3164
3166 assert(ICmpInst::isStrictPredicate(Pred));
3167 if (Compares.size() < 2)
3168 return false;
3169 ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
3170 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3171 Next != Compares.end(); ++Prev, ++Next) {
3172 Value *LHS = Next->Invariant;
3173 Value *RHS = Prev->Invariant;
3174 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3175 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3176 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3177 std::nullopt, std::move(ToInject));
3178 UnswitchCandidates.push_back(std::move(Candidate));
3179 }
3180 return true;
3181}
3182
3183/// Collect unswitch candidates by invariant conditions that are not immediately
3184/// present in the loop. However, they can be injected into the code if we
3185/// decide it's profitable.
3186/// An example of such conditions is following:
3187///
3188/// for (...) {
3189/// x = load ...
3190/// if (! x <u C1) break;
3191/// if (! x <u C2) break;
3192/// <do something>
3193/// }
3194///
3195/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3196/// C2" automatically implies "x <u C2", so we can get rid of one of
3197/// loop-variant checks in unswitched loop version.
3200 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3201 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3202 const MemorySSAUpdater *MSSAU) {
3204 return false;
3205
3206 if (!DT.isReachableFromEntry(L.getHeader()))
3207 return false;
3208 auto *Latch = L.getLoopLatch();
3209 // Need to have a single latch and a preheader.
3210 if (!Latch)
3211 return false;
3212 assert(L.getLoopPreheader() && "Must have a preheader!");
3213
3215 // Traverse the conditions that dominate latch (and therefore dominate each
3216 // other).
3217 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3218 DTN = DTN->getIDom()) {
3220 Value *LHS = nullptr, *RHS = nullptr;
3221 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3222 auto *BB = DTN->getBlock();
3223 // Ignore inner loops.
3224 if (LI.getLoopFor(BB) != &L)
3225 continue;
3226 auto *Term = BB->getTerminator();
3227 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3228 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3229 continue;
3230 if (!LHS->getType()->isIntegerTy())
3231 continue;
3232 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3233 L);
3234 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3235 continue;
3236 if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
3237 continue;
3238 // Strip ZEXT for unsigned predicate.
3239 // TODO: once signed predicates are supported, also strip SEXT.
3240 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3241 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3242 LHS = Zext->getOperand(0);
3243 CandidatesULT[LHS].push_back(Desc);
3244 }
3245
3246 bool Found = false;
3247 for (auto &It : CandidatesULT)
3249 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3250 return Found;
3251}
3252
3254 if (!L.isSafeToClone())
3255 return false;
3256 for (auto *BB : L.blocks())
3257 for (auto &I : *BB) {
3258 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3259 return false;
3260 if (auto *CB = dyn_cast<CallBase>(&I)) {
3261 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3262 if (CB->isConvergent())
3263 return false;
3264 }
3265 }
3266
3267 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3268 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3269 // irreducible control flow into reducible control flow and introduce new
3270 // loops "out of thin air". If we ever discover important use cases for doing
3271 // this, we can add support to loop unswitch, but it is a lot of complexity
3272 // for what seems little or no real world benefit.
3273 LoopBlocksRPO RPOT(&L);
3274 RPOT.perform(&LI);
3275 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3276 return false;
3277
3279 L.getUniqueExitBlocks(ExitBlocks);
3280 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3281 // instruction as we don't know how to split those exit blocks.
3282 // FIXME: We should teach SplitBlock to handle this and remove this
3283 // restriction.
3284 for (auto *ExitBB : ExitBlocks) {
3285 auto *I = ExitBB->getFirstNonPHI();
3286 if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
3287 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3288 "in exit block\n");
3289 return false;
3290 }
3291 }
3292
3293 return true;
3294}
3295
3296static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3297 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3298 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3299 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3300 // Given that unswitching these terminators will require duplicating parts of
3301 // the loop, so we need to be able to model that cost. Compute the ephemeral
3302 // values and set up a data structure to hold per-BB costs. We cache each
3303 // block's cost so that we don't recompute this when considering different
3304 // subsets of the loop for duplication during unswitching.
3306 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3308
3309 // Compute the cost of each block, as well as the total loop cost. Also, bail
3310 // out if we see instructions which are incompatible with loop unswitching
3311 // (convergent, noduplicate, or cross-basic-block tokens).
3312 // FIXME: We might be able to safely handle some of these in non-duplicated
3313 // regions.
3315 L.getHeader()->getParent()->hasMinSize()
3318 InstructionCost LoopCost = 0;
3319 for (auto *BB : L.blocks()) {
3321 for (auto &I : *BB) {
3322 if (EphValues.count(&I))
3323 continue;
3325 }
3326 assert(Cost >= 0 && "Must not have negative costs!");
3327 LoopCost += Cost;
3328 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3329 BBCostMap[BB] = Cost;
3330 }
3331 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3332
3333 // Now we find the best candidate by searching for the one with the following
3334 // properties in order:
3335 //
3336 // 1) An unswitching cost below the threshold
3337 // 2) The smallest number of duplicated unswitch candidates (to avoid
3338 // creating redundant subsequent unswitching)
3339 // 3) The smallest cost after unswitching.
3340 //
3341 // We prioritize reducing fanout of unswitch candidates provided the cost
3342 // remains below the threshold because this has a multiplicative effect.
3343 //
3344 // This requires memoizing each dominator subtree to avoid redundant work.
3345 //
3346 // FIXME: Need to actually do the number of candidates part above.
3348 // Given a terminator which might be unswitched, computes the non-duplicated
3349 // cost for that terminator.
3350 auto ComputeUnswitchedCost = [&](Instruction &TI,
3351 bool FullUnswitch) -> InstructionCost {
3352 // Unswitching selects unswitches the entire loop.
3353 if (isa<SelectInst>(TI))
3354 return LoopCost;
3355
3356 BasicBlock &BB = *TI.getParent();
3358
3360 for (BasicBlock *SuccBB : successors(&BB)) {
3361 // Don't count successors more than once.
3362 if (!Visited.insert(SuccBB).second)
3363 continue;
3364
3365 // If this is a partial unswitch candidate, then it must be a conditional
3366 // branch with a condition of either `or`, `and`, their corresponding
3367 // select forms or partially invariant instructions. In that case, one of
3368 // the successors is necessarily duplicated, so don't even try to remove
3369 // its cost.
3370 if (!FullUnswitch) {
3371 auto &BI = cast<BranchInst>(TI);
3372 Value *Cond = skipTrivialSelect(BI.getCondition());
3373 if (match(Cond, m_LogicalAnd())) {
3374 if (SuccBB == BI.getSuccessor(1))
3375 continue;
3376 } else if (match(Cond, m_LogicalOr())) {
3377 if (SuccBB == BI.getSuccessor(0))
3378 continue;
3379 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3380 SuccBB == BI.getSuccessor(0)) ||
3381 (!PartialIVInfo.KnownValue->isOneValue() &&
3382 SuccBB == BI.getSuccessor(1)))
3383 continue;
3384 }
3385
3386 // This successor's domtree will not need to be duplicated after
3387 // unswitching if the edge to the successor dominates it (and thus the
3388 // entire tree). This essentially means there is no other path into this
3389 // subtree and so it will end up live in only one clone of the loop.
3390 if (SuccBB->getUniquePredecessor() ||
3391 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3392 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3393 })) {
3394 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3395 assert(Cost <= LoopCost &&
3396 "Non-duplicated cost should never exceed total loop cost!");
3397 }
3398 }
3399
3400 // Now scale the cost by the number of unique successors minus one. We
3401 // subtract one because there is already at least one copy of the entire
3402 // loop. This is computing the new cost of unswitching a condition.
3403 // Note that guards always have 2 unique successors that are implicit and
3404 // will be materialized if we decide to unswitch it.
3405 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3406 assert(SuccessorsCount > 1 &&
3407 "Cannot unswitch a condition without multiple distinct successors!");
3408 return (LoopCost - Cost) * (SuccessorsCount - 1);
3409 };
3410
3411 std::optional<NonTrivialUnswitchCandidate> Best;
3412 for (auto &Candidate : UnswitchCandidates) {
3413 Instruction &TI = *Candidate.TI;
3414 ArrayRef<Value *> Invariants = Candidate.Invariants;
3415 BranchInst *BI = dyn_cast<BranchInst>(&TI);
3416 bool FullUnswitch =
3417 !BI || Candidate.hasPendingInjection() ||
3418 (Invariants.size() == 1 &&
3419 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3420 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3421 // Calculate cost multiplier which is a tool to limit potentially
3422 // exponential behavior of loop-unswitch.
3424 int CostMultiplier =
3425 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3426 assert(
3427 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3428 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3429 CandidateCost *= CostMultiplier;
3430 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3431 << " (multiplier: " << CostMultiplier << ")"
3432 << " for unswitch candidate: " << TI << "\n");
3433 } else {
3434 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3435 << " for unswitch candidate: " << TI << "\n");
3436 }
3437
3438 if (!Best || CandidateCost < Best->Cost) {
3439 Best = Candidate;
3440 Best->Cost = CandidateCost;
3441 }
3442 }
3443 assert(Best && "Must be!");
3444 return *Best;
3445}
3446
3447// Insert a freeze on an unswitched branch if all is true:
3448// 1. freeze-loop-unswitch-cond option is true
3449// 2. The branch may not execute in the loop pre-transformation. If a branch may
3450// not execute and could cause UB, it would always cause UB if it is hoisted outside
3451// of the loop. Insert a freeze to prevent this case.
3452// 3. The branch condition may be poison or undef
3454 AssumptionCache &AC) {
3455 assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3457 return false;
3458
3459 ICFLoopSafetyInfo SafetyInfo;
3460 SafetyInfo.computeLoopSafetyInfo(&L);
3461 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3462 return false;
3463
3464 Value *Cond;
3465 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3466 Cond = skipTrivialSelect(BI->getCondition());
3467 else
3468 Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
3470 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3471}
3472
3474 AssumptionCache &AC, AAResults &AA,
3476 MemorySSAUpdater *MSSAU,
3477 LPMUpdater &LoopUpdater) {
3478 // Collect all invariant conditions within this loop (as opposed to an inner
3479 // loop which would be handled when visiting that inner loop).
3481 IVConditionInfo PartialIVInfo;
3482 Instruction *PartialIVCondBranch = nullptr;
3483 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3484 PartialIVCondBranch, L, LI, AA, MSSAU);
3485 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3486 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3487 PartialIVCondBranch, L, DT, LI, AA,
3488 MSSAU);
3489 // If we didn't find any candidates, we're done.
3490 if (UnswitchCandidates.empty())
3491 return false;
3492
3493 LLVM_DEBUG(
3494 dbgs() << "Considering " << UnswitchCandidates.size()
3495 << " non-trivial loop invariant conditions for unswitching.\n");
3496
3497 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3498 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3499
3500 assert(Best.TI && "Failed to find loop unswitch candidate");
3501 assert(Best.Cost && "Failed to compute cost");
3502
3503 if (*Best.Cost >= UnswitchThreshold) {
3504 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3505 << "\n");
3506 return false;
3507 }
3508
3509 bool InjectedCondition = false;
3510 if (Best.hasPendingInjection()) {
3511 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3512 InjectedCondition = true;
3513 }
3514 assert(!Best.hasPendingInjection() &&
3515 "All injections should have been done by now!");
3516
3517 if (Best.TI != PartialIVCondBranch)
3518 PartialIVInfo.InstToDuplicate.clear();
3519
3520 bool InsertFreeze;
3521 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3522 // If the best candidate is a select, turn it into a branch. Select
3523 // instructions with a poison conditional do not propagate poison, but
3524 // branching on poison causes UB. Insert a freeze on the select
3525 // conditional to prevent UB after turning the select into a branch.
3526 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3527 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3528 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3529 } else {
3530 // If the best candidate is a guard, turn it into a branch.
3531 if (isGuard(Best.TI))
3532 Best.TI =
3533 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3534 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3535 }
3536
3537 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3538 << ") terminator: " << *Best.TI << "\n");
3539 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3540 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3541 InjectedCondition);
3542 return true;
3543}
3544
3545/// Unswitch control flow predicated on loop invariant conditions.
3546///
3547/// This first hoists all branches or switches which are trivial (IE, do not
3548/// require duplicating any part of the loop) out of the loop body. It then
3549/// looks at other loop invariant control flows and tries to unswitch those as
3550/// well by cloning the loop if the result is small enough.
3551///
3552/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3553/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3554/// valid (i.e. its use is enabled).
3555///
3556/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3557/// true, we will attempt to do non-trivial unswitching as well as trivial
3558/// unswitching.
3559///
3560/// The `postUnswitch` function will be run after unswitching is complete
3561/// with information on whether or not the provided loop remains a loop and
3562/// a list of new sibling loops created.
3563///
3564/// If `SE` is non-null, we will update that analysis based on the unswitching
3565/// done.
3566static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3567 AssumptionCache &AC, AAResults &AA,
3568 TargetTransformInfo &TTI, bool Trivial,
3569 bool NonTrivial, ScalarEvolution *SE,
3571 BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
3572 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3573 "Loops must be in LCSSA form before unswitching.");
3574
3575 // Must be in loop simplified form: we need a preheader and dedicated exits.
3576 if (!L.isLoopSimplifyForm())
3577 return false;
3578
3579 // Try trivial unswitch first before loop over other basic blocks in the loop.
3580 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3581 // If we unswitched successfully we will want to clean up the loop before
3582 // processing it further so just mark it as unswitched and return.
3583 postUnswitch(L, LoopUpdater, L.getName(),
3584 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3585 /*InjectedCondition*/ false, {});
3586 return true;
3587 }
3588
3589 const Function *F = L.getHeader()->getParent();
3590
3591 // Check whether we should continue with non-trivial conditions.
3592 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3593 // unswitching for testing and debugging.
3594 // NonTrivial: Parameter that enables non-trivial unswitching for this
3595 // invocation of the transform. But this should be allowed only
3596 // for targets without branch divergence.
3597 //
3598 // FIXME: If divergence analysis becomes available to a loop
3599 // transform, we should allow unswitching for non-trivial uniform
3600 // branches even on targets that have divergence.
3601 // https://bugs.llvm.org/show_bug.cgi?id=48819
3602 bool ContinueWithNonTrivial =
3604 if (!ContinueWithNonTrivial)
3605 return false;
3606
3607 // Skip non-trivial unswitching for optsize functions.
3608 if (F->hasOptSize())
3609 return false;
3610
3611 // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
3612 // of the loops L is nested in, and of the loops nested in L are all cold.
3613 auto IsLoopNestCold = [&](const Loop *L) {
3614 // Check L and all of its parent loops.
3615 auto *Parent = L;
3616 while (Parent) {
3617 if (!PSI->isColdBlock(Parent->getHeader(), BFI))
3618 return false;
3619 Parent = Parent->getParentLoop();
3620 }
3621 // Next check all loops nested within L.
3623 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
3624 L->getSubLoops().end());
3625 while (!Worklist.empty()) {
3626 auto *CurLoop = Worklist.pop_back_val();
3627 if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
3628 return false;
3629 Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),
3630 CurLoop->getSubLoops().end());
3631 }
3632 return true;
3633 };
3634
3635 // Skip cold loops in cold loop nests, as unswitching them brings little
3636 // benefit but increases the code size
3637 if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
3638 LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
3639 return false;
3640 }
3641
3642 // Perform legality checks.
3644 return false;
3645
3646 // For non-trivial unswitching, because it often creates new loops, we rely on
3647 // the pass manager to iterate on the loops rather than trying to immediately
3648 // reach a fixed point. There is no substantial advantage to iterating
3649 // internally, and if any of the new loops are simplified enough to contain
3650 // trivial unswitching we want to prefer those.
3651
3652 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3653 // a partial unswitch when possible below the threshold.
3654 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3655 return true;
3656
3657 // No other opportunities to unswitch.
3658 return false;
3659}
3660
3663 LPMUpdater &U) {
3664 Function &F = *L.getHeader()->getParent();
3665 (void)F;
3666 ProfileSummaryInfo *PSI = nullptr;
3667 if (auto OuterProxy =
3669 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3670 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3671 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3672 << "\n");
3673
3674 std::optional<MemorySSAUpdater> MSSAU;
3675 if (AR.MSSA) {
3676 MSSAU = MemorySSAUpdater(AR.MSSA);
3677 if (VerifyMemorySSA)
3678 AR.MSSA->verifyMemorySSA();
3679 }
3680 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3681 &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
3682 return PreservedAnalyses::all();
3683
3684 if (AR.MSSA && VerifyMemorySSA)
3685 AR.MSSA->verifyMemorySSA();
3686
3687 // Historically this pass has had issues with the dominator tree so verify it
3688 // in asserts builds.
3689 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3690
3691 auto PA = getLoopPassPreservedAnalyses();
3692 if (AR.MSSA)
3693 PA.preserve<MemorySSAAnalysis>();
3694 return PA;
3695}
3696
3698 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3700 OS, MapClassName2PassName);
3701
3702 OS << '<';
3703 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3704 OS << (Trivial ? "" : "no-") << "trivial";
3705 OS << '>';
3706}
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static void canonicalizeForInvariantConditionInjection(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:198
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:242
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:394
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:445
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:432
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:501
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:201
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:166
size_t size() const
Definition: BasicBlock.h:453
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.h:360
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:223
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:510
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1415
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
This is an important base class in LLVM.
Definition: Constant.h:41
bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:124
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator begin()
Definition: DenseMap.h:75
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:79
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2533
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1118
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2019
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1473
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1495
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:178
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:87
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:375
bool isTerminator() const
Definition: Instruction.h:271
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
StringRef getName() const
Definition: LoopInfo.h:388
Metadata node.
Definition: Metadata.h:1067
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:600
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:373
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block.
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:719
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:767
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:677
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
void push_back(EltTy NewVal)
bool empty() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getIntegerBitWidth() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:164
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:151
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1032
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:189
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2067
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1043
Op::Description Desc
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:876
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:50
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1146
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1915
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:363
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:281
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:26
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:530
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:533
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:536
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:220
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:68