LLVM 18.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/Constant.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Use.h"
48#include "llvm/IR/Value.h"
50#include "llvm/Pass.h"
53#include "llvm/Support/Debug.h"
64#include <algorithm>
65#include <cassert>
66#include <iterator>
67#include <numeric>
68#include <optional>
69#include <utility>
70
71#define DEBUG_TYPE "simple-loop-unswitch"
72
73using namespace llvm;
74using namespace llvm::PatternMatch;
75
76STATISTIC(NumBranches, "Number of branches unswitched");
77STATISTIC(NumSwitches, "Number of switches unswitched");
78STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
79STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
80STATISTIC(NumTrivial, "Number of unswitches that are trivial");
82 NumCostMultiplierSkipped,
83 "Number of unswitch candidates that had their cost multiplier skipped");
84STATISTIC(NumInvariantConditionsInjected,
85 "Number of invariant conditions injected and unswitched");
86
88 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
89 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
90 "following the configuration passed into the pass."));
91
92static cl::opt<int>
93 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
94 cl::desc("The cost threshold for unswitching a loop."));
95
97 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
98 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
99 "explosion in nontrivial unswitch."));
101 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
102 cl::desc("Toplevel siblings divisor for cost multiplier."));
104 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
105 cl::desc("Number of unswitch candidates that are ignored when calculating "
106 "cost multiplier."));
108 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
109 cl::desc("If enabled, simple loop unswitching will also consider "
110 "llvm.experimental.guard intrinsics as unswitch candidates."));
112 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
113 cl::init(false), cl::Hidden,
114 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
115 "null checks to save time analyzing if we can keep it."));
117 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
118 cl::desc("Max number of memory uses to explore during "
119 "partial unswitching analysis"),
120 cl::init(100), cl::Hidden);
122 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
123 cl::desc("If enabled, the freeze instruction will be added to condition "
124 "of loop unswitch to prevent miscompilation."));
125
127 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
128 cl::desc("Whether we should inject new invariants and unswitch them to "
129 "eliminate some existing (non-invariant) conditions."),
130 cl::init(true));
131
133 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
134 cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
135 "unswitch on them to eliminate branches that are "
136 "not-taken 1/<this option> times or less."),
137 cl::init(16));
138
139namespace {
140struct CompareDesc {
141 BranchInst *Term;
142 Value *Invariant;
143 BasicBlock *InLoopSucc;
144
145 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
146 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
147};
148
149struct InjectedInvariant {
151 Value *LHS;
152 Value *RHS;
153 BasicBlock *InLoopSucc;
154
155 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
156 BasicBlock *InLoopSucc)
157 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
158};
159
160struct NonTrivialUnswitchCandidate {
161 Instruction *TI = nullptr;
162 TinyPtrVector<Value *> Invariants;
163 std::optional<InstructionCost> Cost;
164 std::optional<InjectedInvariant> PendingInjection;
165 NonTrivialUnswitchCandidate(
166 Instruction *TI, ArrayRef<Value *> Invariants,
167 std::optional<InstructionCost> Cost = std::nullopt,
168 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
169 : TI(TI), Invariants(Invariants), Cost(Cost),
170 PendingInjection(PendingInjection) {};
171
172 bool hasPendingInjection() const { return PendingInjection.has_value(); }
173};
174} // end anonymous namespace.
175
176// Helper to skip (select x, true, false), which matches both a logical AND and
177// OR and can confuse code that tries to determine if \p Cond is either a
178// logical AND or OR but not both.
180 Value *CondNext;
181 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
182 Cond = CondNext;
183 return Cond;
184}
185
186/// Collect all of the loop invariant input values transitively used by the
187/// homogeneous instruction graph from a given root.
188///
189/// This essentially walks from a root recursively through loop variant operands
190/// which have perform the same logical operation (AND or OR) and finds all
191/// inputs which are loop invariant. For some operations these can be
192/// re-associated and unswitched out of the loop entirely.
195 const LoopInfo &LI) {
196 assert(!L.isLoopInvariant(&Root) &&
197 "Only need to walk the graph if root itself is not invariant.");
198 TinyPtrVector<Value *> Invariants;
199
200 bool IsRootAnd = match(&Root, m_LogicalAnd());
201 bool IsRootOr = match(&Root, m_LogicalOr());
202
203 // Build a worklist and recurse through operators collecting invariants.
206 Worklist.push_back(&Root);
207 Visited.insert(&Root);
208 do {
209 Instruction &I = *Worklist.pop_back_val();
210 for (Value *OpV : I.operand_values()) {
211 // Skip constants as unswitching isn't interesting for them.
212 if (isa<Constant>(OpV))
213 continue;
214
215 // Add it to our result if loop invariant.
216 if (L.isLoopInvariant(OpV)) {
217 Invariants.push_back(OpV);
218 continue;
219 }
220
221 // If not an instruction with the same opcode, nothing we can do.
222 Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
223
224 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
225 (IsRootOr && match(OpI, m_LogicalOr())))) {
226 // Visit this operand.
227 if (Visited.insert(OpI).second)
228 Worklist.push_back(OpI);
229 }
230 }
231 } while (!Worklist.empty());
232
233 return Invariants;
234}
235
236static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
237 Constant &Replacement) {
238 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
239
240 // Replace uses of LIC in the loop with the given constant.
241 // We use make_early_inc_range as set invalidates the iterator.
242 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
243 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
244
245 // Replace this use within the loop body.
246 if (UserI && L.contains(UserI))
247 U.set(&Replacement);
248 }
249}
250
251/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
252/// incoming values along this edge.
254 const BasicBlock &ExitingBB,
255 const BasicBlock &ExitBB) {
256 for (const Instruction &I : ExitBB) {
257 auto *PN = dyn_cast<PHINode>(&I);
258 if (!PN)
259 // No more PHIs to check.
260 return true;
261
262 // If the incoming value for this edge isn't loop invariant the unswitch
263 // won't be trivial.
264 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
265 return false;
266 }
267 llvm_unreachable("Basic blocks should never be empty!");
268}
269
270/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
271/// end of \p BB and conditionally branch on the copied condition. We only
272/// branch on a single value.
274 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
275 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
276 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
277 IRBuilder<> IRB(&BB);
278
279 SmallVector<Value *> FrozenInvariants;
280 for (Value *Inv : Invariants) {
281 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
282 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
283 FrozenInvariants.push_back(Inv);
284 }
285
286 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
287 : IRB.CreateAnd(FrozenInvariants);
288 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
289 Direction ? &NormalSucc : &UnswitchedSucc);
290}
291
292/// Copy a set of loop invariant values, and conditionally branch on them.
294 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
295 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
296 MemorySSAUpdater *MSSAU) {
298 for (auto *Val : reverse(ToDuplicate)) {
299 Instruction *Inst = cast<Instruction>(Val);
300 Instruction *NewInst = Inst->clone();
301 NewInst->insertInto(&BB, BB.end());
302 RemapInstruction(NewInst, VMap,
304 VMap[Val] = NewInst;
305
306 if (!MSSAU)
307 continue;
308
309 MemorySSA *MSSA = MSSAU->getMemorySSA();
310 if (auto *MemUse =
311 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
312 auto *DefiningAccess = MemUse->getDefiningAccess();
313 // Get the first defining access before the loop.
314 while (L.contains(DefiningAccess->getBlock())) {
315 // If the defining access is a MemoryPhi, get the incoming
316 // value for the pre-header as defining access.
317 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
318 DefiningAccess =
319 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
320 else
321 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
322 }
323 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
324 NewInst->getParent(),
326 }
327 }
328
329 IRBuilder<> IRB(&BB);
330 Value *Cond = VMap[ToDuplicate[0]];
331 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
332 Direction ? &NormalSucc : &UnswitchedSucc);
333}
334
335/// Rewrite the PHI nodes in an unswitched loop exit basic block.
336///
337/// Requires that the loop exit and unswitched basic block are the same, and
338/// that the exiting block was a unique predecessor of that block. Rewrites the
339/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
340/// PHI nodes from the old preheader that now contains the unswitched
341/// terminator.
343 BasicBlock &OldExitingBB,
344 BasicBlock &OldPH) {
345 for (PHINode &PN : UnswitchedBB.phis()) {
346 // When the loop exit is directly unswitched we just need to update the
347 // incoming basic block. We loop to handle weird cases with repeated
348 // incoming blocks, but expect to typically only have one operand here.
349 for (auto i : seq<int>(0, PN.getNumOperands())) {
350 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
351 "Found incoming block different from unique predecessor!");
352 PN.setIncomingBlock(i, &OldPH);
353 }
354 }
355}
356
357/// Rewrite the PHI nodes in the loop exit basic block and the split off
358/// unswitched block.
359///
360/// Because the exit block remains an exit from the loop, this rewrites the
361/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
362/// nodes into the unswitched basic block to select between the value in the
363/// old preheader and the loop exit.
365 BasicBlock &UnswitchedBB,
366 BasicBlock &OldExitingBB,
367 BasicBlock &OldPH,
368 bool FullUnswitch) {
369 assert(&ExitBB != &UnswitchedBB &&
370 "Must have different loop exit and unswitched blocks!");
371 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
372 for (PHINode &PN : ExitBB.phis()) {
373 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
374 PN.getName() + ".split");
375 NewPN->insertBefore(InsertPt);
376
377 // Walk backwards over the old PHI node's inputs to minimize the cost of
378 // removing each one. We have to do this weird loop manually so that we
379 // create the same number of new incoming edges in the new PHI as we expect
380 // each case-based edge to be included in the unswitched switch in some
381 // cases.
382 // FIXME: This is really, really gross. It would be much cleaner if LLVM
383 // allowed us to create a single entry for a predecessor block without
384 // having separate entries for each "edge" even though these edges are
385 // required to produce identical results.
386 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
387 if (PN.getIncomingBlock(i) != &OldExitingBB)
388 continue;
389
390 Value *Incoming = PN.getIncomingValue(i);
391 if (FullUnswitch)
392 // No more edge from the old exiting block to the exit block.
393 PN.removeIncomingValue(i);
394
395 NewPN->addIncoming(Incoming, &OldPH);
396 }
397
398 // Now replace the old PHI with the new one and wire the old one in as an
399 // input to the new one.
400 PN.replaceAllUsesWith(NewPN);
401 NewPN->addIncoming(&PN, &ExitBB);
402 }
403}
404
405/// Hoist the current loop up to the innermost loop containing a remaining exit.
406///
407/// Because we've removed an exit from the loop, we may have changed the set of
408/// loops reachable and need to move the current loop up the loop nest or even
409/// to an entirely separate nest.
410static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
411 DominatorTree &DT, LoopInfo &LI,
412 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
413 // If the loop is already at the top level, we can't hoist it anywhere.
414 Loop *OldParentL = L.getParentLoop();
415 if (!OldParentL)
416 return;
417
419 L.getExitBlocks(Exits);
420 Loop *NewParentL = nullptr;
421 for (auto *ExitBB : Exits)
422 if (Loop *ExitL = LI.getLoopFor(ExitBB))
423 if (!NewParentL || NewParentL->contains(ExitL))
424 NewParentL = ExitL;
425
426 if (NewParentL == OldParentL)
427 return;
428
429 // The new parent loop (if different) should always contain the old one.
430 if (NewParentL)
431 assert(NewParentL->contains(OldParentL) &&
432 "Can only hoist this loop up the nest!");
433
434 // The preheader will need to move with the body of this loop. However,
435 // because it isn't in this loop we also need to update the primary loop map.
436 assert(OldParentL == LI.getLoopFor(&Preheader) &&
437 "Parent loop of this loop should contain this loop's preheader!");
438 LI.changeLoopFor(&Preheader, NewParentL);
439
440 // Remove this loop from its old parent.
441 OldParentL->removeChildLoop(&L);
442
443 // Add the loop either to the new parent or as a top-level loop.
444 if (NewParentL)
445 NewParentL->addChildLoop(&L);
446 else
447 LI.addTopLevelLoop(&L);
448
449 // Remove this loops blocks from the old parent and every other loop up the
450 // nest until reaching the new parent. Also update all of these
451 // no-longer-containing loops to reflect the nesting change.
452 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
453 OldContainingL = OldContainingL->getParentLoop()) {
454 llvm::erase_if(OldContainingL->getBlocksVector(),
455 [&](const BasicBlock *BB) {
456 return BB == &Preheader || L.contains(BB);
457 });
458
459 OldContainingL->getBlocksSet().erase(&Preheader);
460 for (BasicBlock *BB : L.blocks())
461 OldContainingL->getBlocksSet().erase(BB);
462
463 // Because we just hoisted a loop out of this one, we have essentially
464 // created new exit paths from it. That means we need to form LCSSA PHI
465 // nodes for values used in the no-longer-nested loop.
466 formLCSSA(*OldContainingL, DT, &LI, SE);
467
468 // We shouldn't need to form dedicated exits because the exit introduced
469 // here is the (just split by unswitching) preheader. However, after trivial
470 // unswitching it is possible to get new non-dedicated exits out of parent
471 // loop so let's conservatively form dedicated exit blocks and figure out
472 // if we can optimize later.
473 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
474 /*PreserveLCSSA*/ true);
475 }
476}
477
478// Return the top-most loop containing ExitBB and having ExitBB as exiting block
479// or the loop containing ExitBB, if there is no parent loop containing ExitBB
480// as exiting block.
482 const LoopInfo &LI) {
483 Loop *TopMost = LI.getLoopFor(ExitBB);
484 Loop *Current = TopMost;
485 while (Current) {
486 if (Current->isLoopExiting(ExitBB))
487 TopMost = Current;
488 Current = Current->getParentLoop();
489 }
490 return TopMost;
491}
492
493/// Unswitch a trivial branch if the condition is loop invariant.
494///
495/// This routine should only be called when loop code leading to the branch has
496/// been validated as trivial (no side effects). This routine checks if the
497/// condition is invariant and one of the successors is a loop exit. This
498/// allows us to unswitch without duplicating the loop, making it trivial.
499///
500/// If this routine fails to unswitch the branch it returns false.
501///
502/// If the branch can be unswitched, this routine splits the preheader and
503/// hoists the branch above that split. Preserves loop simplified form
504/// (splitting the exit block as necessary). It simplifies the branch within
505/// the loop to an unconditional branch but doesn't remove it entirely. Further
506/// cleanup can be done with some simplifycfg like pass.
507///
508/// If `SE` is not null, it will be updated based on the potential loop SCEVs
509/// invalidated by this.
511 LoopInfo &LI, ScalarEvolution *SE,
512 MemorySSAUpdater *MSSAU) {
513 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
514 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
515
516 // The loop invariant values that we want to unswitch.
517 TinyPtrVector<Value *> Invariants;
518
519 // When true, we're fully unswitching the branch rather than just unswitching
520 // some input conditions to the branch.
521 bool FullUnswitch = false;
522
524 if (L.isLoopInvariant(Cond)) {
525 Invariants.push_back(Cond);
526 FullUnswitch = true;
527 } else {
528 if (auto *CondInst = dyn_cast<Instruction>(Cond))
529 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
530 if (Invariants.empty()) {
531 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
532 return false;
533 }
534 }
535
536 // Check that one of the branch's successors exits, and which one.
537 bool ExitDirection = true;
538 int LoopExitSuccIdx = 0;
539 auto *LoopExitBB = BI.getSuccessor(0);
540 if (L.contains(LoopExitBB)) {
541 ExitDirection = false;
542 LoopExitSuccIdx = 1;
543 LoopExitBB = BI.getSuccessor(1);
544 if (L.contains(LoopExitBB)) {
545 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
546 return false;
547 }
548 }
549 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
550 auto *ParentBB = BI.getParent();
551 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
552 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
553 return false;
554 }
555
556 // When unswitching only part of the branch's condition, we need the exit
557 // block to be reached directly from the partially unswitched input. This can
558 // be done when the exit block is along the true edge and the branch condition
559 // is a graph of `or` operations, or the exit block is along the false edge
560 // and the condition is a graph of `and` operations.
561 if (!FullUnswitch) {
562 if (ExitDirection ? !match(Cond, m_LogicalOr())
563 : !match(Cond, m_LogicalAnd())) {
564 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
565 "non-full unswitch!\n");
566 return false;
567 }
568 }
569
570 LLVM_DEBUG({
571 dbgs() << " unswitching trivial invariant conditions for: " << BI
572 << "\n";
573 for (Value *Invariant : Invariants) {
574 dbgs() << " " << *Invariant << " == true";
575 if (Invariant != Invariants.back())
576 dbgs() << " ||";
577 dbgs() << "\n";
578 }
579 });
580
581 // If we have scalar evolutions, we need to invalidate them including this
582 // loop, the loop containing the exit block and the topmost parent loop
583 // exiting via LoopExitBB.
584 if (SE) {
585 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
586 SE->forgetLoop(ExitL);
587 else
588 // Forget the entire nest as this exits the entire nest.
589 SE->forgetTopmostLoop(&L);
591 }
592
593 if (MSSAU && VerifyMemorySSA)
594 MSSAU->getMemorySSA()->verifyMemorySSA();
595
596 // Split the preheader, so that we know that there is a safe place to insert
597 // the conditional branch. We will change the preheader to have a conditional
598 // branch on LoopCond.
599 BasicBlock *OldPH = L.getLoopPreheader();
600 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
601
602 // Now that we have a place to insert the conditional branch, create a place
603 // to branch to: this is the exit block out of the loop that we are
604 // unswitching. We need to split this if there are other loop predecessors.
605 // Because the loop is in simplified form, *any* other predecessor is enough.
606 BasicBlock *UnswitchedBB;
607 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
608 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
609 "A branch's parent isn't a predecessor!");
610 UnswitchedBB = LoopExitBB;
611 } else {
612 UnswitchedBB =
613 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
614 }
615
616 if (MSSAU && VerifyMemorySSA)
617 MSSAU->getMemorySSA()->verifyMemorySSA();
618
619 // Actually move the invariant uses into the unswitched position. If possible,
620 // we do this by moving the instructions, but when doing partial unswitching
621 // we do it by building a new merge of the values in the unswitched position.
622 OldPH->getTerminator()->eraseFromParent();
623 if (FullUnswitch) {
624 // If fully unswitching, we can use the existing branch instruction.
625 // Splice it into the old PH to gate reaching the new preheader and re-point
626 // its successors.
627 BI.moveBefore(*OldPH, OldPH->end());
628 BI.setCondition(Cond);
629 if (MSSAU) {
630 // Temporarily clone the terminator, to make MSSA update cheaper by
631 // separating "insert edge" updates from "remove edge" ones.
632 BI.clone()->insertInto(ParentBB, ParentBB->end());
633 } else {
634 // Create a new unconditional branch that will continue the loop as a new
635 // terminator.
636 BranchInst::Create(ContinueBB, ParentBB);
637 }
638 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
639 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
640 } else {
641 // Only unswitching a subset of inputs to the condition, so we will need to
642 // build a new branch that merges the invariant inputs.
643 if (ExitDirection)
645 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
646 "condition!");
647 else
649 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
650 " condition!");
652 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
653 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
654 }
655
656 // Update the dominator tree with the added edge.
657 DT.insertEdge(OldPH, UnswitchedBB);
658
659 // After the dominator tree was updated with the added edge, update MemorySSA
660 // if available.
661 if (MSSAU) {
663 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
664 MSSAU->applyInsertUpdates(Updates, DT);
665 }
666
667 // Finish updating dominator tree and memory ssa for full unswitch.
668 if (FullUnswitch) {
669 if (MSSAU) {
670 // Remove the cloned branch instruction.
671 ParentBB->getTerminator()->eraseFromParent();
672 // Create unconditional branch now.
673 BranchInst::Create(ContinueBB, ParentBB);
674 MSSAU->removeEdge(ParentBB, LoopExitBB);
675 }
676 DT.deleteEdge(ParentBB, LoopExitBB);
677 }
678
679 if (MSSAU && VerifyMemorySSA)
680 MSSAU->getMemorySSA()->verifyMemorySSA();
681
682 // Rewrite the relevant PHI nodes.
683 if (UnswitchedBB == LoopExitBB)
684 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
685 else
686 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
687 *ParentBB, *OldPH, FullUnswitch);
688
689 // The constant we can replace all of our invariants with inside the loop
690 // body. If any of the invariants have a value other than this the loop won't
691 // be entered.
692 ConstantInt *Replacement = ExitDirection
695
696 // Since this is an i1 condition we can also trivially replace uses of it
697 // within the loop with a constant.
698 for (Value *Invariant : Invariants)
699 replaceLoopInvariantUses(L, Invariant, *Replacement);
700
701 // If this was full unswitching, we may have changed the nesting relationship
702 // for this loop so hoist it to its correct parent if needed.
703 if (FullUnswitch)
704 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
705
706 if (MSSAU && VerifyMemorySSA)
707 MSSAU->getMemorySSA()->verifyMemorySSA();
708
709 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
710 ++NumTrivial;
711 ++NumBranches;
712 return true;
713}
714
715/// Unswitch a trivial switch if the condition is loop invariant.
716///
717/// This routine should only be called when loop code leading to the switch has
718/// been validated as trivial (no side effects). This routine checks if the
719/// condition is invariant and that at least one of the successors is a loop
720/// exit. This allows us to unswitch without duplicating the loop, making it
721/// trivial.
722///
723/// If this routine fails to unswitch the switch it returns false.
724///
725/// If the switch can be unswitched, this routine splits the preheader and
726/// copies the switch above that split. If the default case is one of the
727/// exiting cases, it copies the non-exiting cases and points them at the new
728/// preheader. If the default case is not exiting, it copies the exiting cases
729/// and points the default at the preheader. It preserves loop simplified form
730/// (splitting the exit blocks as necessary). It simplifies the switch within
731/// the loop by removing now-dead cases. If the default case is one of those
732/// unswitched, it replaces its destination with a new basic block containing
733/// only unreachable. Such basic blocks, while technically loop exits, are not
734/// considered for unswitching so this is a stable transform and the same
735/// switch will not be revisited. If after unswitching there is only a single
736/// in-loop successor, the switch is further simplified to an unconditional
737/// branch. Still more cleanup can be done with some simplifycfg like pass.
738///
739/// If `SE` is not null, it will be updated based on the potential loop SCEVs
740/// invalidated by this.
742 LoopInfo &LI, ScalarEvolution *SE,
743 MemorySSAUpdater *MSSAU) {
744 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
745 Value *LoopCond = SI.getCondition();
746
747 // If this isn't switching on an invariant condition, we can't unswitch it.
748 if (!L.isLoopInvariant(LoopCond))
749 return false;
750
751 auto *ParentBB = SI.getParent();
752
753 // The same check must be used both for the default and the exit cases. We
754 // should never leave edges from the switch instruction to a basic block that
755 // we are unswitching, hence the condition used to determine the default case
756 // needs to also be used to populate ExitCaseIndices, which is then used to
757 // remove cases from the switch.
758 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
759 // BBToCheck is not an exit block if it is inside loop L.
760 if (L.contains(&BBToCheck))
761 return false;
762 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
763 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
764 return false;
765 // We do not unswitch a block that only has an unreachable statement, as
766 // it's possible this is a previously unswitched block. Only unswitch if
767 // either the terminator is not unreachable, or, if it is, it's not the only
768 // instruction in the block.
769 auto *TI = BBToCheck.getTerminator();
770 bool isUnreachable = isa<UnreachableInst>(TI);
771 return !isUnreachable ||
772 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
773 };
774
775 SmallVector<int, 4> ExitCaseIndices;
776 for (auto Case : SI.cases())
777 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
778 ExitCaseIndices.push_back(Case.getCaseIndex());
779 BasicBlock *DefaultExitBB = nullptr;
782 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
783 DefaultExitBB = SI.getDefaultDest();
784 } else if (ExitCaseIndices.empty())
785 return false;
786
787 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
788
789 if (MSSAU && VerifyMemorySSA)
790 MSSAU->getMemorySSA()->verifyMemorySSA();
791
792 // We may need to invalidate SCEVs for the outermost loop reached by any of
793 // the exits.
794 Loop *OuterL = &L;
795
796 if (DefaultExitBB) {
797 // Check the loop containing this exit.
798 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
799 if (!ExitL || ExitL->contains(OuterL))
800 OuterL = ExitL;
801 }
802 for (unsigned Index : ExitCaseIndices) {
803 auto CaseI = SI.case_begin() + Index;
804 // Compute the outer loop from this exit.
805 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
806 if (!ExitL || ExitL->contains(OuterL))
807 OuterL = ExitL;
808 }
809
810 if (SE) {
811 if (OuterL)
812 SE->forgetLoop(OuterL);
813 else
814 SE->forgetTopmostLoop(&L);
815 }
816
817 if (DefaultExitBB) {
818 // Clear out the default destination temporarily to allow accurate
819 // predecessor lists to be examined below.
820 SI.setDefaultDest(nullptr);
821 }
822
823 // Store the exit cases into a separate data structure and remove them from
824 // the switch.
825 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
827 4> ExitCases;
828 ExitCases.reserve(ExitCaseIndices.size());
830 // We walk the case indices backwards so that we remove the last case first
831 // and don't disrupt the earlier indices.
832 for (unsigned Index : reverse(ExitCaseIndices)) {
833 auto CaseI = SI.case_begin() + Index;
834 // Save the value of this case.
835 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
836 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
837 // Delete the unswitched cases.
838 SIW.removeCase(CaseI);
839 }
840
841 // Check if after this all of the remaining cases point at the same
842 // successor.
843 BasicBlock *CommonSuccBB = nullptr;
844 if (SI.getNumCases() > 0 &&
845 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
846 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
847 }))
848 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
849 if (!DefaultExitBB) {
850 // If we're not unswitching the default, we need it to match any cases to
851 // have a common successor or if we have no cases it is the common
852 // successor.
853 if (SI.getNumCases() == 0)
854 CommonSuccBB = SI.getDefaultDest();
855 else if (SI.getDefaultDest() != CommonSuccBB)
856 CommonSuccBB = nullptr;
857 }
858
859 // Split the preheader, so that we know that there is a safe place to insert
860 // the switch.
861 BasicBlock *OldPH = L.getLoopPreheader();
862 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
863 OldPH->getTerminator()->eraseFromParent();
864
865 // Now add the unswitched switch.
866 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
867 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
868
869 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
870 // First, we split any exit blocks with remaining in-loop predecessors. Then
871 // we update the PHIs in one of two ways depending on if there was a split.
872 // We walk in reverse so that we split in the same order as the cases
873 // appeared. This is purely for convenience of reading the resulting IR, but
874 // it doesn't cost anything really.
875 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
877 // Handle the default exit if necessary.
878 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
879 // ranges aren't quite powerful enough yet.
880 if (DefaultExitBB) {
881 if (pred_empty(DefaultExitBB)) {
882 UnswitchedExitBBs.insert(DefaultExitBB);
883 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
884 } else {
885 auto *SplitBB =
886 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
887 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
888 *ParentBB, *OldPH,
889 /*FullUnswitch*/ true);
890 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
891 }
892 }
893 // Note that we must use a reference in the for loop so that we update the
894 // container.
895 for (auto &ExitCase : reverse(ExitCases)) {
896 // Grab a reference to the exit block in the pair so that we can update it.
897 BasicBlock *ExitBB = std::get<1>(ExitCase);
898
899 // If this case is the last edge into the exit block, we can simply reuse it
900 // as it will no longer be a loop exit. No mapping necessary.
901 if (pred_empty(ExitBB)) {
902 // Only rewrite once.
903 if (UnswitchedExitBBs.insert(ExitBB).second)
904 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
905 continue;
906 }
907
908 // Otherwise we need to split the exit block so that we retain an exit
909 // block from the loop and a target for the unswitched condition.
910 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
911 if (!SplitExitBB) {
912 // If this is the first time we see this, do the split and remember it.
913 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
914 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
915 *ParentBB, *OldPH,
916 /*FullUnswitch*/ true);
917 }
918 // Update the case pair to point to the split block.
919 std::get<1>(ExitCase) = SplitExitBB;
920 }
921
922 // Now add the unswitched cases. We do this in reverse order as we built them
923 // in reverse order.
924 for (auto &ExitCase : reverse(ExitCases)) {
925 ConstantInt *CaseVal = std::get<0>(ExitCase);
926 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
927
928 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
929 }
930
931 // If the default was unswitched, re-point it and add explicit cases for
932 // entering the loop.
933 if (DefaultExitBB) {
934 NewSIW->setDefaultDest(DefaultExitBB);
935 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
936
937 // We removed all the exit cases, so we just copy the cases to the
938 // unswitched switch.
939 for (const auto &Case : SI.cases())
940 NewSIW.addCase(Case.getCaseValue(), NewPH,
942 } else if (DefaultCaseWeight) {
943 // We have to set branch weight of the default case.
944 uint64_t SW = *DefaultCaseWeight;
945 for (const auto &Case : SI.cases()) {
946 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
947 assert(W &&
948 "case weight must be defined as default case weight is defined");
949 SW += *W;
950 }
951 NewSIW.setSuccessorWeight(0, SW);
952 }
953
954 // If we ended up with a common successor for every path through the switch
955 // after unswitching, rewrite it to an unconditional branch to make it easy
956 // to recognize. Otherwise we potentially have to recognize the default case
957 // pointing at unreachable and other complexity.
958 if (CommonSuccBB) {
959 BasicBlock *BB = SI.getParent();
960 // We may have had multiple edges to this common successor block, so remove
961 // them as predecessors. We skip the first one, either the default or the
962 // actual first case.
963 bool SkippedFirst = DefaultExitBB == nullptr;
964 for (auto Case : SI.cases()) {
965 assert(Case.getCaseSuccessor() == CommonSuccBB &&
966 "Non-common successor!");
967 (void)Case;
968 if (!SkippedFirst) {
969 SkippedFirst = true;
970 continue;
971 }
972 CommonSuccBB->removePredecessor(BB,
973 /*KeepOneInputPHIs*/ true);
974 }
975 // Now nuke the switch and replace it with a direct branch.
976 SIW.eraseFromParent();
977 BranchInst::Create(CommonSuccBB, BB);
978 } else if (DefaultExitBB) {
979 assert(SI.getNumCases() > 0 &&
980 "If we had no cases we'd have a common successor!");
981 // Move the last case to the default successor. This is valid as if the
982 // default got unswitched it cannot be reached. This has the advantage of
983 // being simple and keeping the number of edges from this switch to
984 // successors the same, and avoiding any PHI update complexity.
985 auto LastCaseI = std::prev(SI.case_end());
986
987 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
989 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
990 SIW.removeCase(LastCaseI);
991 }
992
993 // Walk the unswitched exit blocks and the unswitched split blocks and update
994 // the dominator tree based on the CFG edits. While we are walking unordered
995 // containers here, the API for applyUpdates takes an unordered list of
996 // updates and requires them to not contain duplicates.
998 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
999 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1000 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1001 }
1002 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1003 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1004 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1005 }
1006
1007 if (MSSAU) {
1008 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1009 if (VerifyMemorySSA)
1010 MSSAU->getMemorySSA()->verifyMemorySSA();
1011 } else {
1012 DT.applyUpdates(DTUpdates);
1013 }
1014
1015 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1016
1017 // We may have changed the nesting relationship for this loop so hoist it to
1018 // its correct parent if needed.
1019 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1020
1021 if (MSSAU && VerifyMemorySSA)
1022 MSSAU->getMemorySSA()->verifyMemorySSA();
1023
1024 ++NumTrivial;
1025 ++NumSwitches;
1026 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1027 return true;
1028}
1029
1030/// This routine scans the loop to find a branch or switch which occurs before
1031/// any side effects occur. These can potentially be unswitched without
1032/// duplicating the loop. If a branch or switch is successfully unswitched the
1033/// scanning continues to see if subsequent branches or switches have become
1034/// trivial. Once all trivial candidates have been unswitched, this routine
1035/// returns.
1036///
1037/// The return value indicates whether anything was unswitched (and therefore
1038/// changed).
1039///
1040/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1041/// invalidated by this.
1043 LoopInfo &LI, ScalarEvolution *SE,
1044 MemorySSAUpdater *MSSAU) {
1045 bool Changed = false;
1046
1047 // If loop header has only one reachable successor we should keep looking for
1048 // trivial condition candidates in the successor as well. An alternative is
1049 // to constant fold conditions and merge successors into loop header (then we
1050 // only need to check header's terminator). The reason for not doing this in
1051 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1052 // invariants. Folding dead branches could either eliminate the current loop
1053 // or make other loops unreachable. LCSSA form might also not be preserved
1054 // after deleting branches. The following code keeps traversing loop header's
1055 // successors until it finds the trivial condition candidate (condition that
1056 // is not a constant). Since unswitching generates branches with constant
1057 // conditions, this scenario could be very common in practice.
1058 BasicBlock *CurrentBB = L.getHeader();
1060 Visited.insert(CurrentBB);
1061 do {
1062 // Check if there are any side-effecting instructions (e.g. stores, calls,
1063 // volatile loads) in the part of the loop that the code *would* execute
1064 // without unswitching.
1065 if (MSSAU) // Possible early exit with MSSA
1066 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1067 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1068 return Changed;
1069 if (llvm::any_of(*CurrentBB,
1070 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1071 return Changed;
1072
1073 Instruction *CurrentTerm = CurrentBB->getTerminator();
1074
1075 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1076 // Don't bother trying to unswitch past a switch with a constant
1077 // condition. This should be removed prior to running this pass by
1078 // simplifycfg.
1079 if (isa<Constant>(SI->getCondition()))
1080 return Changed;
1081
1082 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1083 // Couldn't unswitch this one so we're done.
1084 return Changed;
1085
1086 // Mark that we managed to unswitch something.
1087 Changed = true;
1088
1089 // If unswitching turned the terminator into an unconditional branch then
1090 // we can continue. The unswitching logic specifically works to fold any
1091 // cases it can into an unconditional branch to make it easier to
1092 // recognize here.
1093 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1094 if (!BI || BI->isConditional())
1095 return Changed;
1096
1097 CurrentBB = BI->getSuccessor(0);
1098 continue;
1099 }
1100
1101 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1102 if (!BI)
1103 // We do not understand other terminator instructions.
1104 return Changed;
1105
1106 // Don't bother trying to unswitch past an unconditional branch or a branch
1107 // with a constant value. These should be removed by simplifycfg prior to
1108 // running this pass.
1109 if (!BI->isConditional() ||
1110 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1111 return Changed;
1112
1113 // Found a trivial condition candidate: non-foldable conditional branch. If
1114 // we fail to unswitch this, we can't do anything else that is trivial.
1115 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1116 return Changed;
1117
1118 // Mark that we managed to unswitch something.
1119 Changed = true;
1120
1121 // If we only unswitched some of the conditions feeding the branch, we won't
1122 // have collapsed it to a single successor.
1123 BI = cast<BranchInst>(CurrentBB->getTerminator());
1124 if (BI->isConditional())
1125 return Changed;
1126
1127 // Follow the newly unconditional branch into its successor.
1128 CurrentBB = BI->getSuccessor(0);
1129
1130 // When continuing, if we exit the loop or reach a previous visited block,
1131 // then we can not reach any trivial condition candidates (unfoldable
1132 // branch instructions or switch instructions) and no unswitch can happen.
1133 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1134
1135 return Changed;
1136}
1137
1138/// Build the cloned blocks for an unswitched copy of the given loop.
1139///
1140/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1141/// after the split block (`SplitBB`) that will be used to select between the
1142/// cloned and original loop.
1143///
1144/// This routine handles cloning all of the necessary loop blocks and exit
1145/// blocks including rewriting their instructions and the relevant PHI nodes.
1146/// Any loop blocks or exit blocks which are dominated by a different successor
1147/// than the one for this clone of the loop blocks can be trivially skipped. We
1148/// use the `DominatingSucc` map to determine whether a block satisfies that
1149/// property with a simple map lookup.
1150///
1151/// It also correctly creates the unconditional branch in the cloned
1152/// unswitched parent block to only point at the unswitched successor.
1153///
1154/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1155/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1156/// the cloned blocks (and their loops) are left without full `LoopInfo`
1157/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1158/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1159/// instead the caller must recompute an accurate DT. It *does* correctly
1160/// update the `AssumptionCache` provided in `AC`.
1162 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1163 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1164 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1166 ValueToValueMapTy &VMap,
1168 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1169 ScalarEvolution *SE) {
1171 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1172
1173 // We will need to clone a bunch of blocks, wrap up the clone operation in
1174 // a helper.
1175 auto CloneBlock = [&](BasicBlock *OldBB) {
1176 // Clone the basic block and insert it before the new preheader.
1177 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1178 NewBB->moveBefore(LoopPH);
1179
1180 // Record this block and the mapping.
1181 NewBlocks.push_back(NewBB);
1182 VMap[OldBB] = NewBB;
1183
1184 return NewBB;
1185 };
1186
1187 // We skip cloning blocks when they have a dominating succ that is not the
1188 // succ we are cloning for.
1189 auto SkipBlock = [&](BasicBlock *BB) {
1190 auto It = DominatingSucc.find(BB);
1191 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1192 };
1193
1194 // First, clone the preheader.
1195 auto *ClonedPH = CloneBlock(LoopPH);
1196
1197 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1198 for (auto *LoopBB : L.blocks())
1199 if (!SkipBlock(LoopBB))
1200 CloneBlock(LoopBB);
1201
1202 // Split all the loop exit edges so that when we clone the exit blocks, if
1203 // any of the exit blocks are *also* a preheader for some other loop, we
1204 // don't create multiple predecessors entering the loop header.
1205 for (auto *ExitBB : ExitBlocks) {
1206 if (SkipBlock(ExitBB))
1207 continue;
1208
1209 // When we are going to clone an exit, we don't need to clone all the
1210 // instructions in the exit block and we want to ensure we have an easy
1211 // place to merge the CFG, so split the exit first. This is always safe to
1212 // do because there cannot be any non-loop predecessors of a loop exit in
1213 // loop simplified form.
1214 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1215
1216 // Rearrange the names to make it easier to write test cases by having the
1217 // exit block carry the suffix rather than the merge block carrying the
1218 // suffix.
1219 MergeBB->takeName(ExitBB);
1220 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1221
1222 // Now clone the original exit block.
1223 auto *ClonedExitBB = CloneBlock(ExitBB);
1224 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1225 "Exit block should have been split to have one successor!");
1226 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1227 "Cloned exit block has the wrong successor!");
1228
1229 // Remap any cloned instructions and create a merge phi node for them.
1230 for (auto ZippedInsts : llvm::zip_first(
1231 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1232 llvm::make_range(ClonedExitBB->begin(),
1233 std::prev(ClonedExitBB->end())))) {
1234 Instruction &I = std::get<0>(ZippedInsts);
1235 Instruction &ClonedI = std::get<1>(ZippedInsts);
1236
1237 // The only instructions in the exit block should be PHI nodes and
1238 // potentially a landing pad.
1239 assert(
1240 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1241 "Bad instruction in exit block!");
1242 // We should have a value map between the instruction and its clone.
1243 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1244
1245 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1246 if (SE && isa<PHINode>(I))
1247 SE->forgetValue(&I);
1248
1249 auto *MergePN =
1250 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1251 MergePN->insertBefore(MergeBB->getFirstInsertionPt());
1252 I.replaceAllUsesWith(MergePN);
1253 MergePN->addIncoming(&I, ExitBB);
1254 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1255 }
1256 }
1257
1258 // Rewrite the instructions in the cloned blocks to refer to the instructions
1259 // in the cloned blocks. We have to do this as a second pass so that we have
1260 // everything available. Also, we have inserted new instructions which may
1261 // include assume intrinsics, so we update the assumption cache while
1262 // processing this.
1263 for (auto *ClonedBB : NewBlocks)
1264 for (Instruction &I : *ClonedBB) {
1265 RemapInstruction(&I, VMap,
1267 if (auto *II = dyn_cast<AssumeInst>(&I))
1268 AC.registerAssumption(II);
1269 }
1270
1271 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1272 // have spurious incoming values.
1273 for (auto *LoopBB : L.blocks())
1274 if (SkipBlock(LoopBB))
1275 for (auto *SuccBB : successors(LoopBB))
1276 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1277 for (PHINode &PN : ClonedSuccBB->phis())
1278 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1279
1280 // Remove the cloned parent as a predecessor of any successor we ended up
1281 // cloning other than the unswitched one.
1282 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1283 for (auto *SuccBB : successors(ParentBB)) {
1284 if (SuccBB == UnswitchedSuccBB)
1285 continue;
1286
1287 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1288 if (!ClonedSuccBB)
1289 continue;
1290
1291 ClonedSuccBB->removePredecessor(ClonedParentBB,
1292 /*KeepOneInputPHIs*/ true);
1293 }
1294
1295 // Replace the cloned branch with an unconditional branch to the cloned
1296 // unswitched successor.
1297 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1298 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1299 // Trivial Simplification. If Terminator is a conditional branch and
1300 // condition becomes dead - erase it.
1301 Value *ClonedConditionToErase = nullptr;
1302 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1303 ClonedConditionToErase = BI->getCondition();
1304 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1305 ClonedConditionToErase = SI->getCondition();
1306
1307 ClonedTerminator->eraseFromParent();
1308 BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1309
1310 if (ClonedConditionToErase)
1311 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1312 MSSAU);
1313
1314 // If there are duplicate entries in the PHI nodes because of multiple edges
1315 // to the unswitched successor, we need to nuke all but one as we replaced it
1316 // with a direct branch.
1317 for (PHINode &PN : ClonedSuccBB->phis()) {
1318 bool Found = false;
1319 // Loop over the incoming operands backwards so we can easily delete as we
1320 // go without invalidating the index.
1321 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1322 if (PN.getIncomingBlock(i) != ClonedParentBB)
1323 continue;
1324 if (!Found) {
1325 Found = true;
1326 continue;
1327 }
1328 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1329 }
1330 }
1331
1332 // Record the domtree updates for the new blocks.
1334 for (auto *ClonedBB : NewBlocks) {
1335 for (auto *SuccBB : successors(ClonedBB))
1336 if (SuccSet.insert(SuccBB).second)
1337 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1338 SuccSet.clear();
1339 }
1340
1341 return ClonedPH;
1342}
1343
1344/// Recursively clone the specified loop and all of its children.
1345///
1346/// The target parent loop for the clone should be provided, or can be null if
1347/// the clone is a top-level loop. While cloning, all the blocks are mapped
1348/// with the provided value map. The entire original loop must be present in
1349/// the value map. The cloned loop is returned.
1350static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1351 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1352 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1353 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1354 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1355 for (auto *BB : OrigL.blocks()) {
1356 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1357 ClonedL.addBlockEntry(ClonedBB);
1358 if (LI.getLoopFor(BB) == &OrigL)
1359 LI.changeLoopFor(ClonedBB, &ClonedL);
1360 }
1361 };
1362
1363 // We specially handle the first loop because it may get cloned into
1364 // a different parent and because we most commonly are cloning leaf loops.
1365 Loop *ClonedRootL = LI.AllocateLoop();
1366 if (RootParentL)
1367 RootParentL->addChildLoop(ClonedRootL);
1368 else
1369 LI.addTopLevelLoop(ClonedRootL);
1370 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1371
1372 if (OrigRootL.isInnermost())
1373 return ClonedRootL;
1374
1375 // If we have a nest, we can quickly clone the entire loop nest using an
1376 // iterative approach because it is a tree. We keep the cloned parent in the
1377 // data structure to avoid repeatedly querying through a map to find it.
1378 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1379 // Build up the loops to clone in reverse order as we'll clone them from the
1380 // back.
1381 for (Loop *ChildL : llvm::reverse(OrigRootL))
1382 LoopsToClone.push_back({ClonedRootL, ChildL});
1383 do {
1384 Loop *ClonedParentL, *L;
1385 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1386 Loop *ClonedL = LI.AllocateLoop();
1387 ClonedParentL->addChildLoop(ClonedL);
1388 AddClonedBlocksToLoop(*L, *ClonedL);
1389 for (Loop *ChildL : llvm::reverse(*L))
1390 LoopsToClone.push_back({ClonedL, ChildL});
1391 } while (!LoopsToClone.empty());
1392
1393 return ClonedRootL;
1394}
1395
1396/// Build the cloned loops of an original loop from unswitching.
1397///
1398/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1399/// operation. We need to re-verify that there even is a loop (as the backedge
1400/// may not have been cloned), and even if there are remaining backedges the
1401/// backedge set may be different. However, we know that each child loop is
1402/// undisturbed, we only need to find where to place each child loop within
1403/// either any parent loop or within a cloned version of the original loop.
1404///
1405/// Because child loops may end up cloned outside of any cloned version of the
1406/// original loop, multiple cloned sibling loops may be created. All of them
1407/// are returned so that the newly introduced loop nest roots can be
1408/// identified.
1409static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1410 const ValueToValueMapTy &VMap, LoopInfo &LI,
1411 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1412 Loop *ClonedL = nullptr;
1413
1414 auto *OrigPH = OrigL.getLoopPreheader();
1415 auto *OrigHeader = OrigL.getHeader();
1416
1417 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1418 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1419
1420 // We need to know the loops of the cloned exit blocks to even compute the
1421 // accurate parent loop. If we only clone exits to some parent of the
1422 // original parent, we want to clone into that outer loop. We also keep track
1423 // of the loops that our cloned exit blocks participate in.
1424 Loop *ParentL = nullptr;
1425 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1427 ClonedExitsInLoops.reserve(ExitBlocks.size());
1428 for (auto *ExitBB : ExitBlocks)
1429 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1430 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1431 ExitLoopMap[ClonedExitBB] = ExitL;
1432 ClonedExitsInLoops.push_back(ClonedExitBB);
1433 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1434 ParentL = ExitL;
1435 }
1436 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1437 ParentL->contains(OrigL.getParentLoop())) &&
1438 "The computed parent loop should always contain (or be) the parent of "
1439 "the original loop.");
1440
1441 // We build the set of blocks dominated by the cloned header from the set of
1442 // cloned blocks out of the original loop. While not all of these will
1443 // necessarily be in the cloned loop, it is enough to establish that they
1444 // aren't in unreachable cycles, etc.
1445 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1446 for (auto *BB : OrigL.blocks())
1447 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1448 ClonedLoopBlocks.insert(ClonedBB);
1449
1450 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1451 // skipped cloning some region of this loop which can in turn skip some of
1452 // the backedges so we have to rebuild the blocks in the loop based on the
1453 // backedges that remain after cloning.
1455 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1456 for (auto *Pred : predecessors(ClonedHeader)) {
1457 // The only possible non-loop header predecessor is the preheader because
1458 // we know we cloned the loop in simplified form.
1459 if (Pred == ClonedPH)
1460 continue;
1461
1462 // Because the loop was in simplified form, the only non-loop predecessor
1463 // should be the preheader.
1464 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1465 "header other than the preheader "
1466 "that is not part of the loop!");
1467
1468 // Insert this block into the loop set and on the first visit (and if it
1469 // isn't the header we're currently walking) put it into the worklist to
1470 // recurse through.
1471 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1472 Worklist.push_back(Pred);
1473 }
1474
1475 // If we had any backedges then there *is* a cloned loop. Put the header into
1476 // the loop set and then walk the worklist backwards to find all the blocks
1477 // that remain within the loop after cloning.
1478 if (!BlocksInClonedLoop.empty()) {
1479 BlocksInClonedLoop.insert(ClonedHeader);
1480
1481 while (!Worklist.empty()) {
1482 BasicBlock *BB = Worklist.pop_back_val();
1483 assert(BlocksInClonedLoop.count(BB) &&
1484 "Didn't put block into the loop set!");
1485
1486 // Insert any predecessors that are in the possible set into the cloned
1487 // set, and if the insert is successful, add them to the worklist. Note
1488 // that we filter on the blocks that are definitely reachable via the
1489 // backedge to the loop header so we may prune out dead code within the
1490 // cloned loop.
1491 for (auto *Pred : predecessors(BB))
1492 if (ClonedLoopBlocks.count(Pred) &&
1493 BlocksInClonedLoop.insert(Pred).second)
1494 Worklist.push_back(Pred);
1495 }
1496
1497 ClonedL = LI.AllocateLoop();
1498 if (ParentL) {
1499 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1500 ParentL->addChildLoop(ClonedL);
1501 } else {
1502 LI.addTopLevelLoop(ClonedL);
1503 }
1504 NonChildClonedLoops.push_back(ClonedL);
1505
1506 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1507 // We don't want to just add the cloned loop blocks based on how we
1508 // discovered them. The original order of blocks was carefully built in
1509 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1510 // that logic, we just re-walk the original blocks (and those of the child
1511 // loops) and filter them as we add them into the cloned loop.
1512 for (auto *BB : OrigL.blocks()) {
1513 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1514 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1515 continue;
1516
1517 // Directly add the blocks that are only in this loop.
1518 if (LI.getLoopFor(BB) == &OrigL) {
1519 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1520 continue;
1521 }
1522
1523 // We want to manually add it to this loop and parents.
1524 // Registering it with LoopInfo will happen when we clone the top
1525 // loop for this block.
1526 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1527 PL->addBlockEntry(ClonedBB);
1528 }
1529
1530 // Now add each child loop whose header remains within the cloned loop. All
1531 // of the blocks within the loop must satisfy the same constraints as the
1532 // header so once we pass the header checks we can just clone the entire
1533 // child loop nest.
1534 for (Loop *ChildL : OrigL) {
1535 auto *ClonedChildHeader =
1536 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1537 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1538 continue;
1539
1540#ifndef NDEBUG
1541 // We should never have a cloned child loop header but fail to have
1542 // all of the blocks for that child loop.
1543 for (auto *ChildLoopBB : ChildL->blocks())
1544 assert(BlocksInClonedLoop.count(
1545 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1546 "Child cloned loop has a header within the cloned outer "
1547 "loop but not all of its blocks!");
1548#endif
1549
1550 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1551 }
1552 }
1553
1554 // Now that we've handled all the components of the original loop that were
1555 // cloned into a new loop, we still need to handle anything from the original
1556 // loop that wasn't in a cloned loop.
1557
1558 // Figure out what blocks are left to place within any loop nest containing
1559 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1560 // them.
1561 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1562 if (BlocksInClonedLoop.empty())
1563 UnloopedBlockSet.insert(ClonedPH);
1564 for (auto *ClonedBB : ClonedLoopBlocks)
1565 if (!BlocksInClonedLoop.count(ClonedBB))
1566 UnloopedBlockSet.insert(ClonedBB);
1567
1568 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1569 // backwards across these to process them inside out. The order shouldn't
1570 // matter as we're just trying to build up the map from inside-out; we use
1571 // the map in a more stably ordered way below.
1572 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1573 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1574 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1575 ExitLoopMap.lookup(RHS)->getLoopDepth();
1576 });
1577
1578 // Populate the existing ExitLoopMap with everything reachable from each
1579 // exit, starting from the inner most exit.
1580 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1581 assert(Worklist.empty() && "Didn't clear worklist!");
1582
1583 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1584 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1585
1586 // Walk the CFG back until we hit the cloned PH adding everything reachable
1587 // and in the unlooped set to this exit block's loop.
1588 Worklist.push_back(ExitBB);
1589 do {
1590 BasicBlock *BB = Worklist.pop_back_val();
1591 // We can stop recursing at the cloned preheader (if we get there).
1592 if (BB == ClonedPH)
1593 continue;
1594
1595 for (BasicBlock *PredBB : predecessors(BB)) {
1596 // If this pred has already been moved to our set or is part of some
1597 // (inner) loop, no update needed.
1598 if (!UnloopedBlockSet.erase(PredBB)) {
1599 assert(
1600 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1601 "Predecessor not mapped to a loop!");
1602 continue;
1603 }
1604
1605 // We just insert into the loop set here. We'll add these blocks to the
1606 // exit loop after we build up the set in an order that doesn't rely on
1607 // predecessor order (which in turn relies on use list order).
1608 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1609 (void)Inserted;
1610 assert(Inserted && "Should only visit an unlooped block once!");
1611
1612 // And recurse through to its predecessors.
1613 Worklist.push_back(PredBB);
1614 }
1615 } while (!Worklist.empty());
1616 }
1617
1618 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1619 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1620 // in their original order adding them to the correct loop.
1621
1622 // We need a stable insertion order. We use the order of the original loop
1623 // order and map into the correct parent loop.
1624 for (auto *BB : llvm::concat<BasicBlock *const>(
1625 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1626 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1627 OuterL->addBasicBlockToLoop(BB, LI);
1628
1629#ifndef NDEBUG
1630 for (auto &BBAndL : ExitLoopMap) {
1631 auto *BB = BBAndL.first;
1632 auto *OuterL = BBAndL.second;
1633 assert(LI.getLoopFor(BB) == OuterL &&
1634 "Failed to put all blocks into outer loops!");
1635 }
1636#endif
1637
1638 // Now that all the blocks are placed into the correct containing loop in the
1639 // absence of child loops, find all the potentially cloned child loops and
1640 // clone them into whatever outer loop we placed their header into.
1641 for (Loop *ChildL : OrigL) {
1642 auto *ClonedChildHeader =
1643 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1644 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1645 continue;
1646
1647#ifndef NDEBUG
1648 for (auto *ChildLoopBB : ChildL->blocks())
1649 assert(VMap.count(ChildLoopBB) &&
1650 "Cloned a child loop header but not all of that loops blocks!");
1651#endif
1652
1653 NonChildClonedLoops.push_back(cloneLoopNest(
1654 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1655 }
1656}
1657
1658static void
1660 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1661 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1662 // Find all the dead clones, and remove them from their successors.
1664 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1665 for (const auto &VMap : VMaps)
1666 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1667 if (!DT.isReachableFromEntry(ClonedBB)) {
1668 for (BasicBlock *SuccBB : successors(ClonedBB))
1669 SuccBB->removePredecessor(ClonedBB);
1670 DeadBlocks.push_back(ClonedBB);
1671 }
1672
1673 // Remove all MemorySSA in the dead blocks
1674 if (MSSAU) {
1675 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1676 DeadBlocks.end());
1677 MSSAU->removeBlocks(DeadBlockSet);
1678 }
1679
1680 // Drop any remaining references to break cycles.
1681 for (BasicBlock *BB : DeadBlocks)
1682 BB->dropAllReferences();
1683 // Erase them from the IR.
1684 for (BasicBlock *BB : DeadBlocks)
1685 BB->eraseFromParent();
1686}
1687
1688static void
1691 DominatorTree &DT, LoopInfo &LI,
1692 MemorySSAUpdater *MSSAU,
1693 ScalarEvolution *SE,
1694 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
1695 // Find all the dead blocks tied to this loop, and remove them from their
1696 // successors.
1698
1699 // Start with loop/exit blocks and get a transitive closure of reachable dead
1700 // blocks.
1701 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1702 ExitBlocks.end());
1703 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1704 while (!DeathCandidates.empty()) {
1705 auto *BB = DeathCandidates.pop_back_val();
1706 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1707 for (BasicBlock *SuccBB : successors(BB)) {
1708 SuccBB->removePredecessor(BB);
1709 DeathCandidates.push_back(SuccBB);
1710 }
1711 DeadBlockSet.insert(BB);
1712 }
1713 }
1714
1715 // Remove all MemorySSA in the dead blocks
1716 if (MSSAU)
1717 MSSAU->removeBlocks(DeadBlockSet);
1718
1719 // Filter out the dead blocks from the exit blocks list so that it can be
1720 // used in the caller.
1721 llvm::erase_if(ExitBlocks,
1722 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1723
1724 // Walk from this loop up through its parents removing all of the dead blocks.
1725 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1726 for (auto *BB : DeadBlockSet)
1727 ParentL->getBlocksSet().erase(BB);
1728 llvm::erase_if(ParentL->getBlocksVector(),
1729 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1730 }
1731
1732 // Now delete the dead child loops. This raw delete will clear them
1733 // recursively.
1734 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1735 if (!DeadBlockSet.count(ChildL->getHeader()))
1736 return false;
1737
1738 assert(llvm::all_of(ChildL->blocks(),
1739 [&](BasicBlock *ChildBB) {
1740 return DeadBlockSet.count(ChildBB);
1741 }) &&
1742 "If the child loop header is dead all blocks in the child loop must "
1743 "be dead as well!");
1744 DestroyLoopCB(*ChildL, ChildL->getName());
1745 if (SE)
1747 LI.destroy(ChildL);
1748 return true;
1749 });
1750
1751 // Remove the loop mappings for the dead blocks and drop all the references
1752 // from these blocks to others to handle cyclic references as we start
1753 // deleting the blocks themselves.
1754 for (auto *BB : DeadBlockSet) {
1755 // Check that the dominator tree has already been updated.
1756 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1757 LI.changeLoopFor(BB, nullptr);
1758 // Drop all uses of the instructions to make sure we won't have dangling
1759 // uses in other blocks.
1760 for (auto &I : *BB)
1761 if (!I.use_empty())
1762 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1763 BB->dropAllReferences();
1764 }
1765
1766 // Actually delete the blocks now that they've been fully unhooked from the
1767 // IR.
1768 for (auto *BB : DeadBlockSet)
1769 BB->eraseFromParent();
1770}
1771
1772/// Recompute the set of blocks in a loop after unswitching.
1773///
1774/// This walks from the original headers predecessors to rebuild the loop. We
1775/// take advantage of the fact that new blocks can't have been added, and so we
1776/// filter by the original loop's blocks. This also handles potentially
1777/// unreachable code that we don't want to explore but might be found examining
1778/// the predecessors of the header.
1779///
1780/// If the original loop is no longer a loop, this will return an empty set. If
1781/// it remains a loop, all the blocks within it will be added to the set
1782/// (including those blocks in inner loops).
1784 LoopInfo &LI) {
1786
1787 auto *PH = L.getLoopPreheader();
1788 auto *Header = L.getHeader();
1789
1790 // A worklist to use while walking backwards from the header.
1792
1793 // First walk the predecessors of the header to find the backedges. This will
1794 // form the basis of our walk.
1795 for (auto *Pred : predecessors(Header)) {
1796 // Skip the preheader.
1797 if (Pred == PH)
1798 continue;
1799
1800 // Because the loop was in simplified form, the only non-loop predecessor
1801 // is the preheader.
1802 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1803 "than the preheader that is not part of the "
1804 "loop!");
1805
1806 // Insert this block into the loop set and on the first visit and, if it
1807 // isn't the header we're currently walking, put it into the worklist to
1808 // recurse through.
1809 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1810 Worklist.push_back(Pred);
1811 }
1812
1813 // If no backedges were found, we're done.
1814 if (LoopBlockSet.empty())
1815 return LoopBlockSet;
1816
1817 // We found backedges, recurse through them to identify the loop blocks.
1818 while (!Worklist.empty()) {
1819 BasicBlock *BB = Worklist.pop_back_val();
1820 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1821
1822 // No need to walk past the header.
1823 if (BB == Header)
1824 continue;
1825
1826 // Because we know the inner loop structure remains valid we can use the
1827 // loop structure to jump immediately across the entire nested loop.
1828 // Further, because it is in loop simplified form, we can directly jump
1829 // to its preheader afterward.
1830 if (Loop *InnerL = LI.getLoopFor(BB))
1831 if (InnerL != &L) {
1832 assert(L.contains(InnerL) &&
1833 "Should not reach a loop *outside* this loop!");
1834 // The preheader is the only possible predecessor of the loop so
1835 // insert it into the set and check whether it was already handled.
1836 auto *InnerPH = InnerL->getLoopPreheader();
1837 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1838 "but not contain the inner loop "
1839 "preheader!");
1840 if (!LoopBlockSet.insert(InnerPH).second)
1841 // The only way to reach the preheader is through the loop body
1842 // itself so if it has been visited the loop is already handled.
1843 continue;
1844
1845 // Insert all of the blocks (other than those already present) into
1846 // the loop set. We expect at least the block that led us to find the
1847 // inner loop to be in the block set, but we may also have other loop
1848 // blocks if they were already enqueued as predecessors of some other
1849 // outer loop block.
1850 for (auto *InnerBB : InnerL->blocks()) {
1851 if (InnerBB == BB) {
1852 assert(LoopBlockSet.count(InnerBB) &&
1853 "Block should already be in the set!");
1854 continue;
1855 }
1856
1857 LoopBlockSet.insert(InnerBB);
1858 }
1859
1860 // Add the preheader to the worklist so we will continue past the
1861 // loop body.
1862 Worklist.push_back(InnerPH);
1863 continue;
1864 }
1865
1866 // Insert any predecessors that were in the original loop into the new
1867 // set, and if the insert is successful, add them to the worklist.
1868 for (auto *Pred : predecessors(BB))
1869 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1870 Worklist.push_back(Pred);
1871 }
1872
1873 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1874
1875 // We've found all the blocks participating in the loop, return our completed
1876 // set.
1877 return LoopBlockSet;
1878}
1879
1880/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1881///
1882/// The removal may have removed some child loops entirely but cannot have
1883/// disturbed any remaining child loops. However, they may need to be hoisted
1884/// to the parent loop (or to be top-level loops). The original loop may be
1885/// completely removed.
1886///
1887/// The sibling loops resulting from this update are returned. If the original
1888/// loop remains a valid loop, it will be the first entry in this list with all
1889/// of the newly sibling loops following it.
1890///
1891/// Returns true if the loop remains a loop after unswitching, and false if it
1892/// is no longer a loop after unswitching (and should not continue to be
1893/// referenced).
1895 LoopInfo &LI,
1896 SmallVectorImpl<Loop *> &HoistedLoops,
1897 ScalarEvolution *SE) {
1898 auto *PH = L.getLoopPreheader();
1899
1900 // Compute the actual parent loop from the exit blocks. Because we may have
1901 // pruned some exits the loop may be different from the original parent.
1902 Loop *ParentL = nullptr;
1903 SmallVector<Loop *, 4> ExitLoops;
1904 SmallVector<BasicBlock *, 4> ExitsInLoops;
1905 ExitsInLoops.reserve(ExitBlocks.size());
1906 for (auto *ExitBB : ExitBlocks)
1907 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1908 ExitLoops.push_back(ExitL);
1909 ExitsInLoops.push_back(ExitBB);
1910 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1911 ParentL = ExitL;
1912 }
1913
1914 // Recompute the blocks participating in this loop. This may be empty if it
1915 // is no longer a loop.
1916 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1917
1918 // If we still have a loop, we need to re-set the loop's parent as the exit
1919 // block set changing may have moved it within the loop nest. Note that this
1920 // can only happen when this loop has a parent as it can only hoist the loop
1921 // *up* the nest.
1922 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1923 // Remove this loop's (original) blocks from all of the intervening loops.
1924 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1925 IL = IL->getParentLoop()) {
1926 IL->getBlocksSet().erase(PH);
1927 for (auto *BB : L.blocks())
1928 IL->getBlocksSet().erase(BB);
1929 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1930 return BB == PH || L.contains(BB);
1931 });
1932 }
1933
1934 LI.changeLoopFor(PH, ParentL);
1935 L.getParentLoop()->removeChildLoop(&L);
1936 if (ParentL)
1937 ParentL->addChildLoop(&L);
1938 else
1939 LI.addTopLevelLoop(&L);
1940 }
1941
1942 // Now we update all the blocks which are no longer within the loop.
1943 auto &Blocks = L.getBlocksVector();
1944 auto BlocksSplitI =
1945 LoopBlockSet.empty()
1946 ? Blocks.begin()
1947 : std::stable_partition(
1948 Blocks.begin(), Blocks.end(),
1949 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1950
1951 // Before we erase the list of unlooped blocks, build a set of them.
1952 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1953 if (LoopBlockSet.empty())
1954 UnloopedBlocks.insert(PH);
1955
1956 // Now erase these blocks from the loop.
1957 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1958 L.getBlocksSet().erase(BB);
1959 Blocks.erase(BlocksSplitI, Blocks.end());
1960
1961 // Sort the exits in ascending loop depth, we'll work backwards across these
1962 // to process them inside out.
1963 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1964 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1965 });
1966
1967 // We'll build up a set for each exit loop.
1968 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1969 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1970
1971 auto RemoveUnloopedBlocksFromLoop =
1972 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1973 for (auto *BB : UnloopedBlocks)
1974 L.getBlocksSet().erase(BB);
1975 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1976 return UnloopedBlocks.count(BB);
1977 });
1978 };
1979
1981 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1982 assert(Worklist.empty() && "Didn't clear worklist!");
1983 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1984
1985 // Grab the next exit block, in decreasing loop depth order.
1986 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1987 Loop &ExitL = *LI.getLoopFor(ExitBB);
1988 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1989
1990 // Erase all of the unlooped blocks from the loops between the previous
1991 // exit loop and this exit loop. This works because the ExitInLoops list is
1992 // sorted in increasing order of loop depth and thus we visit loops in
1993 // decreasing order of loop depth.
1994 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1995 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1996
1997 // Walk the CFG back until we hit the cloned PH adding everything reachable
1998 // and in the unlooped set to this exit block's loop.
1999 Worklist.push_back(ExitBB);
2000 do {
2001 BasicBlock *BB = Worklist.pop_back_val();
2002 // We can stop recursing at the cloned preheader (if we get there).
2003 if (BB == PH)
2004 continue;
2005
2006 for (BasicBlock *PredBB : predecessors(BB)) {
2007 // If this pred has already been moved to our set or is part of some
2008 // (inner) loop, no update needed.
2009 if (!UnloopedBlocks.erase(PredBB)) {
2010 assert((NewExitLoopBlocks.count(PredBB) ||
2011 ExitL.contains(LI.getLoopFor(PredBB))) &&
2012 "Predecessor not in a nested loop (or already visited)!");
2013 continue;
2014 }
2015
2016 // We just insert into the loop set here. We'll add these blocks to the
2017 // exit loop after we build up the set in a deterministic order rather
2018 // than the predecessor-influenced visit order.
2019 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2020 (void)Inserted;
2021 assert(Inserted && "Should only visit an unlooped block once!");
2022
2023 // And recurse through to its predecessors.
2024 Worklist.push_back(PredBB);
2025 }
2026 } while (!Worklist.empty());
2027
2028 // If blocks in this exit loop were directly part of the original loop (as
2029 // opposed to a child loop) update the map to point to this exit loop. This
2030 // just updates a map and so the fact that the order is unstable is fine.
2031 for (auto *BB : NewExitLoopBlocks)
2032 if (Loop *BBL = LI.getLoopFor(BB))
2033 if (BBL == &L || !L.contains(BBL))
2034 LI.changeLoopFor(BB, &ExitL);
2035
2036 // We will remove the remaining unlooped blocks from this loop in the next
2037 // iteration or below.
2038 NewExitLoopBlocks.clear();
2039 }
2040
2041 // Any remaining unlooped blocks are no longer part of any loop unless they
2042 // are part of some child loop.
2043 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2044 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2045 for (auto *BB : UnloopedBlocks)
2046 if (Loop *BBL = LI.getLoopFor(BB))
2047 if (BBL == &L || !L.contains(BBL))
2048 LI.changeLoopFor(BB, nullptr);
2049
2050 // Sink all the child loops whose headers are no longer in the loop set to
2051 // the parent (or to be top level loops). We reach into the loop and directly
2052 // update its subloop vector to make this batch update efficient.
2053 auto &SubLoops = L.getSubLoopsVector();
2054 auto SubLoopsSplitI =
2055 LoopBlockSet.empty()
2056 ? SubLoops.begin()
2057 : std::stable_partition(
2058 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2059 return LoopBlockSet.count(SubL->getHeader());
2060 });
2061 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2062 HoistedLoops.push_back(HoistedL);
2063 HoistedL->setParentLoop(nullptr);
2064
2065 // To compute the new parent of this hoisted loop we look at where we
2066 // placed the preheader above. We can't lookup the header itself because we
2067 // retained the mapping from the header to the hoisted loop. But the
2068 // preheader and header should have the exact same new parent computed
2069 // based on the set of exit blocks from the original loop as the preheader
2070 // is a predecessor of the header and so reached in the reverse walk. And
2071 // because the loops were all in simplified form the preheader of the
2072 // hoisted loop can't be part of some *other* loop.
2073 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2074 NewParentL->addChildLoop(HoistedL);
2075 else
2076 LI.addTopLevelLoop(HoistedL);
2077 }
2078 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2079
2080 // Actually delete the loop if nothing remained within it.
2081 if (Blocks.empty()) {
2082 assert(SubLoops.empty() &&
2083 "Failed to remove all subloops from the original loop!");
2084 if (Loop *ParentL = L.getParentLoop())
2085 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2086 else
2087 LI.removeLoop(llvm::find(LI, &L));
2088 // markLoopAsDeleted for L should be triggered by the caller (it is typically
2089 // done by using the UnswitchCB callback).
2090 if (SE)
2092 LI.destroy(&L);
2093 return false;
2094 }
2095
2096 return true;
2097}
2098
2099/// Helper to visit a dominator subtree, invoking a callable on each node.
2100///
2101/// Returning false at any point will stop walking past that node of the tree.
2102template <typename CallableT>
2103void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2105 DomWorklist.push_back(DT[BB]);
2106#ifndef NDEBUG
2108 Visited.insert(DT[BB]);
2109#endif
2110 do {
2111 DomTreeNode *N = DomWorklist.pop_back_val();
2112
2113 // Visit this node.
2114 if (!Callable(N->getBlock()))
2115 continue;
2116
2117 // Accumulate the child nodes.
2118 for (DomTreeNode *ChildN : *N) {
2119 assert(Visited.insert(ChildN).second &&
2120 "Cannot visit a node twice when walking a tree!");
2121 DomWorklist.push_back(ChildN);
2122 }
2123 } while (!DomWorklist.empty());
2124}
2125
2127 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2128 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2129 AssumptionCache &AC,
2130 function_ref<void(bool, bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2132 function_ref<void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze,
2133 bool InjectedCondition) {
2134 auto *ParentBB = TI.getParent();
2135 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2136 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2137
2138 // We can only unswitch switches, conditional branches with an invariant
2139 // condition, or combining invariant conditions with an instruction or
2140 // partially invariant instructions.
2141 assert((SI || (BI && BI->isConditional())) &&
2142 "Can only unswitch switches and conditional branch!");
2143 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2144 bool FullUnswitch =
2145 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2146 !PartiallyInvariant);
2147 if (FullUnswitch)
2148 assert(Invariants.size() == 1 &&
2149 "Cannot have other invariants with full unswitching!");
2150 else
2151 assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
2152 "Partial unswitching requires an instruction as the condition!");
2153
2154 if (MSSAU && VerifyMemorySSA)
2155 MSSAU->getMemorySSA()->verifyMemorySSA();
2156
2157 // Constant and BBs tracking the cloned and continuing successor. When we are
2158 // unswitching the entire condition, this can just be trivially chosen to
2159 // unswitch towards `true`. However, when we are unswitching a set of
2160 // invariants combined with `and` or `or` or partially invariant instructions,
2161 // the combining operation determines the best direction to unswitch: we want
2162 // to unswitch the direction that will collapse the branch.
2163 bool Direction = true;
2164 int ClonedSucc = 0;
2165 if (!FullUnswitch) {
2167 (void)Cond;
2169 PartiallyInvariant) &&
2170 "Only `or`, `and`, an `select`, partially invariant instructions "
2171 "can combine invariants being unswitched.");
2172 if (!match(Cond, m_LogicalOr())) {
2173 if (match(Cond, m_LogicalAnd()) ||
2174 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2175 Direction = false;
2176 ClonedSucc = 1;
2177 }
2178 }
2179 }
2180
2181 BasicBlock *RetainedSuccBB =
2182 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2183 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2184 if (BI)
2185 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2186 else
2187 for (auto Case : SI->cases())
2188 if (Case.getCaseSuccessor() != RetainedSuccBB)
2189 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2190
2191 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2192 "Should not unswitch the same successor we are retaining!");
2193
2194 // The branch should be in this exact loop. Any inner loop's invariant branch
2195 // should be handled by unswitching that inner loop. The caller of this
2196 // routine should filter out any candidates that remain (but were skipped for
2197 // whatever reason).
2198 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2199
2200 // Compute the parent loop now before we start hacking on things.
2201 Loop *ParentL = L.getParentLoop();
2202 // Get blocks in RPO order for MSSA update, before changing the CFG.
2203 LoopBlocksRPO LBRPO(&L);
2204 if (MSSAU)
2205 LBRPO.perform(&LI);
2206
2207 // Compute the outer-most loop containing one of our exit blocks. This is the
2208 // furthest up our loopnest which can be mutated, which we will use below to
2209 // update things.
2210 Loop *OuterExitL = &L;
2212 L.getUniqueExitBlocks(ExitBlocks);
2213 for (auto *ExitBB : ExitBlocks) {
2214 // ExitBB can be an exit block for several levels in the loop nest. Make
2215 // sure we find the top most.
2216 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2217 if (!NewOuterExitL) {
2218 // We exited the entire nest with this block, so we're done.
2219 OuterExitL = nullptr;
2220 break;
2221 }
2222 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2223 OuterExitL = NewOuterExitL;
2224 }
2225
2226 // At this point, we're definitely going to unswitch something so invalidate
2227 // any cached information in ScalarEvolution for the outer most loop
2228 // containing an exit block and all nested loops.
2229 if (SE) {
2230 if (OuterExitL)
2231 SE->forgetLoop(OuterExitL);
2232 else
2233 SE->forgetTopmostLoop(&L);
2235 }
2236
2237 // If the edge from this terminator to a successor dominates that successor,
2238 // store a map from each block in its dominator subtree to it. This lets us
2239 // tell when cloning for a particular successor if a block is dominated by
2240 // some *other* successor with a single data structure. We use this to
2241 // significantly reduce cloning.
2243 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2244 UnswitchedSuccBBs))
2245 if (SuccBB->getUniquePredecessor() ||
2246 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2247 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2248 }))
2249 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2250 DominatingSucc[BB] = SuccBB;
2251 return true;
2252 });
2253
2254 // Split the preheader, so that we know that there is a safe place to insert
2255 // the conditional branch. We will change the preheader to have a conditional
2256 // branch on LoopCond. The original preheader will become the split point
2257 // between the unswitched versions, and we will have a new preheader for the
2258 // original loop.
2259 BasicBlock *SplitBB = L.getLoopPreheader();
2260 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2261
2262 // Keep track of the dominator tree updates needed.
2264
2265 // Clone the loop for each unswitched successor.
2267 VMaps.reserve(UnswitchedSuccBBs.size());
2269 for (auto *SuccBB : UnswitchedSuccBBs) {
2270 VMaps.emplace_back(new ValueToValueMapTy());
2271 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2272 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2273 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2274 }
2275
2276 // Drop metadata if we may break its semantics by moving this instr into the
2277 // split block.
2278 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2280 // Do not spend time trying to understand if we can keep it, just drop it
2281 // to save compile time.
2282 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2283 else {
2284 // It is only legal to preserve make.implicit metadata if we are
2285 // guaranteed no reach implicit null check after following this branch.
2286 ICFLoopSafetyInfo SafetyInfo;
2287 SafetyInfo.computeLoopSafetyInfo(&L);
2288 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2289 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2290 }
2291 }
2292
2293 // The stitching of the branched code back together depends on whether we're
2294 // doing full unswitching or not with the exception that we always want to
2295 // nuke the initial terminator placed in the split block.
2296 SplitBB->getTerminator()->eraseFromParent();
2297 if (FullUnswitch) {
2298 // Splice the terminator from the original loop and rewrite its
2299 // successors.
2300 TI.moveBefore(*SplitBB, SplitBB->end());
2301
2302 // Keep a clone of the terminator for MSSA updates.
2303 Instruction *NewTI = TI.clone();
2304 NewTI->insertInto(ParentBB, ParentBB->end());
2305
2306 // First wire up the moved terminator to the preheaders.
2307 if (BI) {
2308 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2309 BI->setSuccessor(ClonedSucc, ClonedPH);
2310 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2312 if (InsertFreeze)
2313 Cond = new FreezeInst(
2314 Cond, Cond->getName() + ".fr", BI);
2315 BI->setCondition(Cond);
2316 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2317 } else {
2318 assert(SI && "Must either be a branch or switch!");
2319
2320 // Walk the cases and directly update their successors.
2321 assert(SI->getDefaultDest() == RetainedSuccBB &&
2322 "Not retaining default successor!");
2323 SI->setDefaultDest(LoopPH);
2324 for (const auto &Case : SI->cases())
2325 if (Case.getCaseSuccessor() == RetainedSuccBB)
2326 Case.setSuccessor(LoopPH);
2327 else
2328 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2329
2330 if (InsertFreeze)
2331 SI->setCondition(new FreezeInst(
2332 SI->getCondition(), SI->getCondition()->getName() + ".fr", SI));
2333
2334 // We need to use the set to populate domtree updates as even when there
2335 // are multiple cases pointing at the same successor we only want to
2336 // remove and insert one edge in the domtree.
2337 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2338 DTUpdates.push_back(
2339 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2340 }
2341
2342 if (MSSAU) {
2343 DT.applyUpdates(DTUpdates);
2344 DTUpdates.clear();
2345
2346 // Remove all but one edge to the retained block and all unswitched
2347 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2348 // when we know we only keep a single edge for each case.
2349 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2350 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2351 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2352
2353 for (auto &VMap : VMaps)
2354 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2355 /*IgnoreIncomingWithNoClones=*/true);
2356 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2357
2358 // Remove all edges to unswitched blocks.
2359 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2360 MSSAU->removeEdge(ParentBB, SuccBB);
2361 }
2362
2363 // Now unhook the successor relationship as we'll be replacing
2364 // the terminator with a direct branch. This is much simpler for branches
2365 // than switches so we handle those first.
2366 if (BI) {
2367 // Remove the parent as a predecessor of the unswitched successor.
2368 assert(UnswitchedSuccBBs.size() == 1 &&
2369 "Only one possible unswitched block for a branch!");
2370 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2371 UnswitchedSuccBB->removePredecessor(ParentBB,
2372 /*KeepOneInputPHIs*/ true);
2373 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2374 } else {
2375 // Note that we actually want to remove the parent block as a predecessor
2376 // of *every* case successor. The case successor is either unswitched,
2377 // completely eliminating an edge from the parent to that successor, or it
2378 // is a duplicate edge to the retained successor as the retained successor
2379 // is always the default successor and as we'll replace this with a direct
2380 // branch we no longer need the duplicate entries in the PHI nodes.
2381 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2382 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2383 "Not retaining default successor!");
2384 for (const auto &Case : NewSI->cases())
2385 Case.getCaseSuccessor()->removePredecessor(
2386 ParentBB,
2387 /*KeepOneInputPHIs*/ true);
2388
2389 // We need to use the set to populate domtree updates as even when there
2390 // are multiple cases pointing at the same successor we only want to
2391 // remove and insert one edge in the domtree.
2392 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2393 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2394 }
2395
2396 // After MSSAU update, remove the cloned terminator instruction NewTI.
2397 ParentBB->getTerminator()->eraseFromParent();
2398
2399 // Create a new unconditional branch to the continuing block (as opposed to
2400 // the one cloned).
2401 BranchInst::Create(RetainedSuccBB, ParentBB);
2402 } else {
2403 assert(BI && "Only branches have partial unswitching.");
2404 assert(UnswitchedSuccBBs.size() == 1 &&
2405 "Only one possible unswitched block for a branch!");
2406 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2407 // When doing a partial unswitch, we have to do a bit more work to build up
2408 // the branch in the split block.
2409 if (PartiallyInvariant)
2411 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2412 else {
2414 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2415 FreezeLoopUnswitchCond, BI, &AC, DT);
2416 }
2417 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2418
2419 if (MSSAU) {
2420 DT.applyUpdates(DTUpdates);
2421 DTUpdates.clear();
2422
2423 // Perform MSSA cloning updates.
2424 for (auto &VMap : VMaps)
2425 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2426 /*IgnoreIncomingWithNoClones=*/true);
2427 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2428 }
2429 }
2430
2431 // Apply the updates accumulated above to get an up-to-date dominator tree.
2432 DT.applyUpdates(DTUpdates);
2433
2434 // Now that we have an accurate dominator tree, first delete the dead cloned
2435 // blocks so that we can accurately build any cloned loops. It is important to
2436 // not delete the blocks from the original loop yet because we still want to
2437 // reference the original loop to understand the cloned loop's structure.
2438 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2439
2440 // Build the cloned loop structure itself. This may be substantially
2441 // different from the original structure due to the simplified CFG. This also
2442 // handles inserting all the cloned blocks into the correct loops.
2443 SmallVector<Loop *, 4> NonChildClonedLoops;
2444 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2445 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2446
2447 // Now that our cloned loops have been built, we can update the original loop.
2448 // First we delete the dead blocks from it and then we rebuild the loop
2449 // structure taking these deletions into account.
2450 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE,DestroyLoopCB);
2451
2452 if (MSSAU && VerifyMemorySSA)
2453 MSSAU->getMemorySSA()->verifyMemorySSA();
2454
2455 SmallVector<Loop *, 4> HoistedLoops;
2456 bool IsStillLoop =
2457 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2458
2459 if (MSSAU && VerifyMemorySSA)
2460 MSSAU->getMemorySSA()->verifyMemorySSA();
2461
2462 // This transformation has a high risk of corrupting the dominator tree, and
2463 // the below steps to rebuild loop structures will result in hard to debug
2464 // errors in that case so verify that the dominator tree is sane first.
2465 // FIXME: Remove this when the bugs stop showing up and rely on existing
2466 // verification steps.
2467 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2468
2469 if (BI && !PartiallyInvariant) {
2470 // If we unswitched a branch which collapses the condition to a known
2471 // constant we want to replace all the uses of the invariants within both
2472 // the original and cloned blocks. We do this here so that we can use the
2473 // now updated dominator tree to identify which side the users are on.
2474 assert(UnswitchedSuccBBs.size() == 1 &&
2475 "Only one possible unswitched block for a branch!");
2476 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2477
2478 // When considering multiple partially-unswitched invariants
2479 // we cant just go replace them with constants in both branches.
2480 //
2481 // For 'AND' we infer that true branch ("continue") means true
2482 // for each invariant operand.
2483 // For 'OR' we can infer that false branch ("continue") means false
2484 // for each invariant operand.
2485 // So it happens that for multiple-partial case we dont replace
2486 // in the unswitched branch.
2487 bool ReplaceUnswitched =
2488 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2489
2490 ConstantInt *UnswitchedReplacement =
2493 ConstantInt *ContinueReplacement =
2496 for (Value *Invariant : Invariants) {
2497 assert(!isa<Constant>(Invariant) &&
2498 "Should not be replacing constant values!");
2499 // Use make_early_inc_range here as set invalidates the iterator.
2500 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2501 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2502 if (!UserI)
2503 continue;
2504
2505 // Replace it with the 'continue' side if in the main loop body, and the
2506 // unswitched if in the cloned blocks.
2507 if (DT.dominates(LoopPH, UserI->getParent()))
2508 U.set(ContinueReplacement);
2509 else if (ReplaceUnswitched &&
2510 DT.dominates(ClonedPH, UserI->getParent()))
2511 U.set(UnswitchedReplacement);
2512 }
2513 }
2514 }
2515
2516 // We can change which blocks are exit blocks of all the cloned sibling
2517 // loops, the current loop, and any parent loops which shared exit blocks
2518 // with the current loop. As a consequence, we need to re-form LCSSA for
2519 // them. But we shouldn't need to re-form LCSSA for any child loops.
2520 // FIXME: This could be made more efficient by tracking which exit blocks are
2521 // new, and focusing on them, but that isn't likely to be necessary.
2522 //
2523 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2524 // loop nest and update every loop that could have had its exits changed. We
2525 // also need to cover any intervening loops. We add all of these loops to
2526 // a list and sort them by loop depth to achieve this without updating
2527 // unnecessary loops.
2528 auto UpdateLoop = [&](Loop &UpdateL) {
2529#ifndef NDEBUG
2530 UpdateL.verifyLoop();
2531 for (Loop *ChildL : UpdateL) {
2532 ChildL->verifyLoop();
2533 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2534 "Perturbed a child loop's LCSSA form!");
2535 }
2536#endif
2537 // First build LCSSA for this loop so that we can preserve it when
2538 // forming dedicated exits. We don't want to perturb some other loop's
2539 // LCSSA while doing that CFG edit.
2540 formLCSSA(UpdateL, DT, &LI, SE);
2541
2542 // For loops reached by this loop's original exit blocks we may
2543 // introduced new, non-dedicated exits. At least try to re-form dedicated
2544 // exits for these loops. This may fail if they couldn't have dedicated
2545 // exits to start with.
2546 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2547 };
2548
2549 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2550 // and we can do it in any order as they don't nest relative to each other.
2551 //
2552 // Also check if any of the loops we have updated have become top-level loops
2553 // as that will necessitate widening the outer loop scope.
2554 for (Loop *UpdatedL :
2555 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2556 UpdateLoop(*UpdatedL);
2557 if (UpdatedL->isOutermost())
2558 OuterExitL = nullptr;
2559 }
2560 if (IsStillLoop) {
2561 UpdateLoop(L);
2562 if (L.isOutermost())
2563 OuterExitL = nullptr;
2564 }
2565
2566 // If the original loop had exit blocks, walk up through the outer most loop
2567 // of those exit blocks to update LCSSA and form updated dedicated exits.
2568 if (OuterExitL != &L)
2569 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2570 OuterL = OuterL->getParentLoop())
2571 UpdateLoop(*OuterL);
2572
2573#ifndef NDEBUG
2574 // Verify the entire loop structure to catch any incorrect updates before we
2575 // progress in the pass pipeline.
2576 LI.verify(DT);
2577#endif
2578
2579 // Now that we've unswitched something, make callbacks to report the changes.
2580 // For that we need to merge together the updated loops and the cloned loops
2581 // and check whether the original loop survived.
2582 SmallVector<Loop *, 4> SibLoops;
2583 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2584 if (UpdatedL->getParentLoop() == ParentL)
2585 SibLoops.push_back(UpdatedL);
2586 UnswitchCB(IsStillLoop, PartiallyInvariant, InjectedCondition, SibLoops);
2587
2588 if (MSSAU && VerifyMemorySSA)
2589 MSSAU->getMemorySSA()->verifyMemorySSA();
2590
2591 if (BI)
2592 ++NumBranches;
2593 else
2594 ++NumSwitches;
2595}
2596
2597/// Recursively compute the cost of a dominator subtree based on the per-block
2598/// cost map provided.
2599///
2600/// The recursive computation is memozied into the provided DT-indexed cost map
2601/// to allow querying it for most nodes in the domtree without it becoming
2602/// quadratic.
2604 DomTreeNode &N,
2607 // Don't accumulate cost (or recurse through) blocks not in our block cost
2608 // map and thus not part of the duplication cost being considered.
2609 auto BBCostIt = BBCostMap.find(N.getBlock());
2610 if (BBCostIt == BBCostMap.end())
2611 return 0;
2612
2613 // Lookup this node to see if we already computed its cost.
2614 auto DTCostIt = DTCostMap.find(&N);
2615 if (DTCostIt != DTCostMap.end())
2616 return DTCostIt->second;
2617
2618 // If not, we have to compute it. We can't use insert above and update
2619 // because computing the cost may insert more things into the map.
2620 InstructionCost Cost = std::accumulate(
2621 N.begin(), N.end(), BBCostIt->second,
2622 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2623 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2624 });
2625 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2626 (void)Inserted;
2627 assert(Inserted && "Should not insert a node while visiting children!");
2628 return Cost;
2629}
2630
2631/// Turns a select instruction into implicit control flow branch,
2632/// making the following replacement:
2633///
2634/// head:
2635/// --code before select--
2636/// select %cond, %trueval, %falseval
2637/// --code after select--
2638///
2639/// into
2640///
2641/// head:
2642/// --code before select--
2643/// br i1 %cond, label %then, label %tail
2644///
2645/// then:
2646/// br %tail
2647///
2648/// tail:
2649/// phi [ %trueval, %then ], [ %falseval, %head]
2650/// unreachable
2651///
2652/// It also makes all relevant DT and LI updates, so that all structures are in
2653/// valid state after this transform.
2655 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2656 AssumptionCache *AC) {
2657 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2658 BasicBlock *HeadBB = SI->getParent();
2659
2660 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2661 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2662 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2663 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2664 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2665 *TailBB = CondBr->getSuccessor(1);
2666 if (MSSAU)
2667 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2668
2669 PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI);
2670 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2671 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2672 SI->replaceAllUsesWith(Phi);
2673 SI->eraseFromParent();
2674
2675 if (MSSAU && VerifyMemorySSA)
2676 MSSAU->getMemorySSA()->verifyMemorySSA();
2677
2678 ++NumSelects;
2679 return CondBr;
2680}
2681
2682/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2683/// making the following replacement:
2684///
2685/// --code before guard--
2686/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2687/// --code after guard--
2688///
2689/// into
2690///
2691/// --code before guard--
2692/// br i1 %cond, label %guarded, label %deopt
2693///
2694/// guarded:
2695/// --code after guard--
2696///
2697/// deopt:
2698/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2699/// unreachable
2700///
2701/// It also makes all relevant DT and LI updates, so that all structures are in
2702/// valid state after this transform.
2704 DominatorTree &DT, LoopInfo &LI,
2705 MemorySSAUpdater *MSSAU) {
2707 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2708 BasicBlock *CheckBB = GI->getParent();
2709
2710 if (MSSAU && VerifyMemorySSA)
2711 MSSAU->getMemorySSA()->verifyMemorySSA();
2712
2713 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2714 Instruction *DeoptBlockTerm =
2716 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2717 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2718 // SplitBlockAndInsertIfThen inserts control flow that branches to
2719 // DeoptBlockTerm if the condition is true. We want the opposite.
2720 CheckBI->swapSuccessors();
2721
2722 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2723 GuardedBlock->setName("guarded");
2724 CheckBI->getSuccessor(1)->setName("deopt");
2725 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2726
2727 if (MSSAU)
2728 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2729
2730 GI->moveBefore(DeoptBlockTerm);
2732
2733 if (MSSAU) {
2734 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2735 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2736 if (VerifyMemorySSA)
2737 MSSAU->getMemorySSA()->verifyMemorySSA();
2738 }
2739
2740 if (VerifyLoopInfo)
2741 LI.verify(DT);
2742 ++NumGuards;
2743 return CheckBI;
2744}
2745
2746/// Cost multiplier is a way to limit potentially exponential behavior
2747/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2748/// candidates available. Also accounting for the number of "sibling" loops with
2749/// the idea to account for previous unswitches that already happened on this
2750/// cluster of loops. There was an attempt to keep this formula simple,
2751/// just enough to limit the worst case behavior. Even if it is not that simple
2752/// now it is still not an attempt to provide a detailed heuristic size
2753/// prediction.
2754///
2755/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2756/// unswitch candidates, making adequate predictions instead of wild guesses.
2757/// That requires knowing not just the number of "remaining" candidates but
2758/// also costs of unswitching for each of these candidates.
2760 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2761 const DominatorTree &DT,
2762 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2763
2764 // Guards and other exiting conditions do not contribute to exponential
2765 // explosion as soon as they dominate the latch (otherwise there might be
2766 // another path to the latch remaining that does not allow to eliminate the
2767 // loop copy on unswitch).
2768 const BasicBlock *Latch = L.getLoopLatch();
2769 const BasicBlock *CondBlock = TI.getParent();
2770 if (DT.dominates(CondBlock, Latch) &&
2771 (isGuard(&TI) ||
2772 (TI.isTerminator() &&
2773 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2774 return L.contains(SuccBB);
2775 }) <= 1))) {
2776 NumCostMultiplierSkipped++;
2777 return 1;
2778 }
2779
2780 auto *ParentL = L.getParentLoop();
2781 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2782 : std::distance(LI.begin(), LI.end()));
2783 // Count amount of clones that all the candidates might cause during
2784 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2785 // cases.
2786 int UnswitchedClones = 0;
2787 for (const auto &Candidate : UnswitchCandidates) {
2788 const Instruction *CI = Candidate.TI;
2789 const BasicBlock *CondBlock = CI->getParent();
2790 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2791 if (isa<SelectInst>(CI)) {
2792 UnswitchedClones++;
2793 continue;
2794 }
2795 if (isGuard(CI)) {
2796 if (!SkipExitingSuccessors)
2797 UnswitchedClones++;
2798 continue;
2799 }
2800 int NonExitingSuccessors =
2801 llvm::count_if(successors(CondBlock),
2802 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2803 return !SkipExitingSuccessors || L.contains(SuccBB);
2804 });
2805 UnswitchedClones += Log2_32(NonExitingSuccessors);
2806 }
2807
2808 // Ignore up to the "unscaled candidates" number of unswitch candidates
2809 // when calculating the power-of-two scaling of the cost. The main idea
2810 // with this control is to allow a small number of unswitches to happen
2811 // and rely more on siblings multiplier (see below) when the number
2812 // of candidates is small.
2813 unsigned ClonesPower =
2814 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2815
2816 // Allowing top-level loops to spread a bit more than nested ones.
2817 int SiblingsMultiplier =
2818 std::max((ParentL ? SiblingsCount
2819 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2820 1);
2821 // Compute the cost multiplier in a way that won't overflow by saturating
2822 // at an upper bound.
2823 int CostMultiplier;
2824 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2825 SiblingsMultiplier > UnswitchThreshold)
2826 CostMultiplier = UnswitchThreshold;
2827 else
2828 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2829 (int)UnswitchThreshold);
2830
2831 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2832 << " (siblings " << SiblingsMultiplier << " * clones "
2833 << (1 << ClonesPower) << ")"
2834 << " for unswitch candidate: " << TI << "\n");
2835 return CostMultiplier;
2836}
2837
2840 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2841 const Loop &L, const LoopInfo &LI, AAResults &AA,
2842 const MemorySSAUpdater *MSSAU) {
2843 assert(UnswitchCandidates.empty() && "Should be!");
2844
2845 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2847 if (isa<Constant>(Cond))
2848 return;
2849 if (L.isLoopInvariant(Cond)) {
2850 UnswitchCandidates.push_back({I, {Cond}});
2851 return;
2852 }
2854 TinyPtrVector<Value *> Invariants =
2856 L, *static_cast<Instruction *>(Cond), LI);
2857 if (!Invariants.empty())
2858 UnswitchCandidates.push_back({I, std::move(Invariants)});
2859 }
2860 };
2861
2862 // Whether or not we should also collect guards in the loop.
2863 bool CollectGuards = false;
2864 if (UnswitchGuards) {
2865 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2866 Intrinsic::getName(Intrinsic::experimental_guard));
2867 if (GuardDecl && !GuardDecl->use_empty())
2868 CollectGuards = true;
2869 }
2870
2871 for (auto *BB : L.blocks()) {
2872 if (LI.getLoopFor(BB) != &L)
2873 continue;
2874
2875 for (auto &I : *BB) {
2876 if (auto *SI = dyn_cast<SelectInst>(&I)) {
2877 auto *Cond = SI->getCondition();
2878 // Do not unswitch vector selects and logical and/or selects
2879 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2880 AddUnswitchCandidatesForInst(SI, Cond);
2881 } else if (CollectGuards && isGuard(&I)) {
2882 auto *Cond =
2883 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2884 // TODO: Support AND, OR conditions and partial unswitching.
2885 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2886 UnswitchCandidates.push_back({&I, {Cond}});
2887 }
2888 }
2889
2890 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2891 // We can only consider fully loop-invariant switch conditions as we need
2892 // to completely eliminate the switch after unswitching.
2893 if (!isa<Constant>(SI->getCondition()) &&
2894 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2895 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2896 continue;
2897 }
2898
2899 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2900 if (!BI || !BI->isConditional() ||
2901 BI->getSuccessor(0) == BI->getSuccessor(1))
2902 continue;
2903
2904 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2905 }
2906
2907 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2908 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2909 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2910 })) {
2911 MemorySSA *MSSA = MSSAU->getMemorySSA();
2912 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2913 LLVM_DEBUG(
2914 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2915 << *Info->InstToDuplicate[0] << "\n");
2916 PartialIVInfo = *Info;
2917 PartialIVCondBranch = L.getHeader()->getTerminator();
2918 TinyPtrVector<Value *> ValsToDuplicate;
2919 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2920 UnswitchCandidates.push_back(
2921 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2922 }
2923 }
2924 return !UnswitchCandidates.empty();
2925}
2926
2927/// Tries to canonicalize condition described by:
2928///
2929/// br (LHS pred RHS), label IfTrue, label IfFalse
2930///
2931/// into its equivalent where `Pred` is something that we support for injected
2932/// invariants (so far it is limited to ult), LHS in canonicalized form is
2933/// non-invariant and RHS is an invariant.
2935 ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue,
2936 BasicBlock *&IfFalse, const Loop &L) {
2937 if (!L.contains(IfTrue)) {
2938 Pred = ICmpInst::getInversePredicate(Pred);
2939 std::swap(IfTrue, IfFalse);
2940 }
2941
2942 // Move loop-invariant argument to RHS position.
2943 if (L.isLoopInvariant(LHS)) {
2944 Pred = ICmpInst::getSwappedPredicate(Pred);
2945 std::swap(LHS, RHS);
2946 }
2947
2948 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
2949 // Turn "x >=s 0" into "x <u UMIN_INT"
2950 Pred = ICmpInst::ICMP_ULT;
2952 RHS->getContext(),
2954 }
2955}
2956
2957/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
2958/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
2959/// injecting a loop-invariant condition.
2961 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
2962 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
2963 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
2964 return false;
2965 // TODO: Support other predicates.
2966 if (Pred != ICmpInst::ICMP_ULT)
2967 return false;
2968 // TODO: Support non-loop-exiting branches?
2969 if (!L.contains(IfTrue) || L.contains(IfFalse))
2970 return false;
2971 // FIXME: For some reason this causes problems with MSSA updates, need to
2972 // investigate why. So far, just don't unswitch latch.
2973 if (L.getHeader() == IfTrue)
2974 return false;
2975 return true;
2976}
2977
2978/// Returns true, if metadata on \p BI allows us to optimize branching into \p
2979/// TakenSucc via injection of invariant conditions. The branch should be not
2980/// enough and not previously unswitched, the information about this comes from
2981/// the metadata.
2983 const BasicBlock *TakenSucc) {
2984 SmallVector<uint32_t> Weights;
2985 if (!extractBranchWeights(*BI, Weights))
2986 return false;
2988 BranchProbability LikelyTaken(T - 1, T);
2989
2990 assert(Weights.size() == 2 && "Unexpected profile data!");
2991 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
2992 auto Num = Weights[Idx];
2993 auto Denom = Weights[0] + Weights[1];
2994 // Degenerate or overflowed metadata.
2995 if (Denom == 0 || Num > Denom)
2996 return false;
2997 BranchProbability ActualTaken(Num, Denom);
2998 if (LikelyTaken > ActualTaken)
2999 return false;
3000 return true;
3001}
3002
3003/// Materialize pending invariant condition of the given candidate into IR. The
3004/// injected loop-invariant condition implies the original loop-variant branch
3005/// condition, so the materialization turns
3006///
3007/// loop_block:
3008/// ...
3009/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3010///
3011/// into
3012///
3013/// preheader:
3014/// %invariant_cond = LHS pred RHS
3015/// ...
3016/// loop_block:
3017/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3018/// OriginalCheck:
3019/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3020/// ...
3021static NonTrivialUnswitchCandidate
3022injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3023 DominatorTree &DT, LoopInfo &LI,
3024 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3025 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3026 BasicBlock *Preheader = L.getLoopPreheader();
3027 assert(Preheader && "Loop is not in simplified form?");
3028 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3029 "Unswitching branch of inner loop!");
3030
3031 auto Pred = Candidate.PendingInjection->Pred;
3032 auto *LHS = Candidate.PendingInjection->LHS;
3033 auto *RHS = Candidate.PendingInjection->RHS;
3034 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3035 auto *TI = cast<BranchInst>(Candidate.TI);
3036 auto *BB = Candidate.TI->getParent();
3037 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3038 : TI->getSuccessor(0);
3039 // FIXME: Remove this once limitation on successors is lifted.
3040 assert(L.contains(InLoopSucc) && "Not supported yet!");
3041 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3042 auto &Ctx = BB->getContext();
3043
3044 IRBuilder<> Builder(Preheader->getTerminator());
3045 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3046 if (LHS->getType() != RHS->getType()) {
3047 if (LHS->getType()->getIntegerBitWidth() <
3049 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3050 else
3051 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3052 }
3053 // Do not use builder here: CreateICmp may simplify this into a constant and
3054 // unswitching will break. Better optimize it away later.
3055 auto *InjectedCond =
3056 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3057 Preheader->getTerminator());
3058
3059 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3060 BB->getParent(), InLoopSucc);
3061 Builder.SetInsertPoint(TI);
3062 auto *InvariantBr =
3063 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3064
3065 Builder.SetInsertPoint(CheckBlock);
3066 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3067 TI->getSuccessor(1));
3068 TI->eraseFromParent();
3069
3070 // Fixup phis.
3071 for (auto &I : *InLoopSucc) {
3072 auto *PN = dyn_cast<PHINode>(&I);
3073 if (!PN)
3074 break;
3075 auto *Inc = PN->getIncomingValueForBlock(BB);
3076 PN->addIncoming(Inc, CheckBlock);
3077 }
3078 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3079
3081 { DominatorTree::Insert, BB, CheckBlock },
3082 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3083 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3084 { DominatorTree::Delete, BB, OutOfLoopSucc }
3085 };
3086
3087 DT.applyUpdates(DTUpdates);
3088 if (MSSAU)
3089 MSSAU->applyUpdates(DTUpdates, DT);
3090 L.addBasicBlockToLoop(CheckBlock, LI);
3091
3092#ifndef NDEBUG
3093 DT.verify();
3094 LI.verify(DT);
3095 if (MSSAU && VerifyMemorySSA)
3096 MSSAU->getMemorySSA()->verifyMemorySSA();
3097#endif
3098
3099 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3100 // higher because we have just inserted a new block. Need to think how to
3101 // adjust the cost of injected candidates when it was first computed.
3102 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3103 << " and considering it for unswitching.");
3104 ++NumInvariantConditionsInjected;
3105 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3106 Candidate.Cost);
3107}
3108
3109/// Given chain of loop branch conditions looking like:
3110/// br (Variant < Invariant1)
3111/// br (Variant < Invariant2)
3112/// br (Variant < Invariant3)
3113/// ...
3114/// collect set of invariant conditions on which we want to unswitch, which
3115/// look like:
3116/// Invariant1 <= Invariant2
3117/// Invariant2 <= Invariant3
3118/// ...
3119/// Though they might not immediately exist in the IR, we can still inject them.
3121 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3123 const DominatorTree &DT) {
3124
3126 assert(ICmpInst::isStrictPredicate(Pred));
3127 if (Compares.size() < 2)
3128 return false;
3129 ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
3130 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3131 Next != Compares.end(); ++Prev, ++Next) {
3132 Value *LHS = Next->Invariant;
3133 Value *RHS = Prev->Invariant;
3134 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3135 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3136 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3137 std::nullopt, std::move(ToInject));
3138 UnswitchCandidates.push_back(std::move(Candidate));
3139 }
3140 return true;
3141}
3142
3143/// Collect unswitch candidates by invariant conditions that are not immediately
3144/// present in the loop. However, they can be injected into the code if we
3145/// decide it's profitable.
3146/// An example of such conditions is following:
3147///
3148/// for (...) {
3149/// x = load ...
3150/// if (! x <u C1) break;
3151/// if (! x <u C2) break;
3152/// <do something>
3153/// }
3154///
3155/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3156/// C2" automatically implies "x <u C2", so we can get rid of one of
3157/// loop-variant checks in unswitched loop version.
3160 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3161 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3162 const MemorySSAUpdater *MSSAU) {
3164 return false;
3165
3166 if (!DT.isReachableFromEntry(L.getHeader()))
3167 return false;
3168 auto *Latch = L.getLoopLatch();
3169 // Need to have a single latch and a preheader.
3170 if (!Latch)
3171 return false;
3172 assert(L.getLoopPreheader() && "Must have a preheader!");
3173
3175 // Traverse the conditions that dominate latch (and therefore dominate each
3176 // other).
3177 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3178 DTN = DTN->getIDom()) {
3180 Value *LHS = nullptr, *RHS = nullptr;
3181 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3182 auto *BB = DTN->getBlock();
3183 // Ignore inner loops.
3184 if (LI.getLoopFor(BB) != &L)
3185 continue;
3186 auto *Term = BB->getTerminator();
3187 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3188 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3189 continue;
3190 if (!LHS->getType()->isIntegerTy())
3191 continue;
3192 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3193 L);
3194 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3195 continue;
3196 if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
3197 continue;
3198 // Strip ZEXT for unsigned predicate.
3199 // TODO: once signed predicates are supported, also strip SEXT.
3200 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3201 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3202 LHS = Zext->getOperand(0);
3203 CandidatesULT[LHS].push_back(Desc);
3204 }
3205
3206 bool Found = false;
3207 for (auto &It : CandidatesULT)
3209 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3210 return Found;
3211}
3212
3214 if (!L.isSafeToClone())
3215 return false;
3216 for (auto *BB : L.blocks())
3217 for (auto &I : *BB) {
3218 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3219 return false;
3220 if (auto *CB = dyn_cast<CallBase>(&I)) {
3221 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3222 if (CB->isConvergent())
3223 return false;
3224 }
3225 }
3226
3227 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3228 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3229 // irreducible control flow into reducible control flow and introduce new
3230 // loops "out of thin air". If we ever discover important use cases for doing
3231 // this, we can add support to loop unswitch, but it is a lot of complexity
3232 // for what seems little or no real world benefit.
3233 LoopBlocksRPO RPOT(&L);
3234 RPOT.perform(&LI);
3235 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3236 return false;
3237
3239 L.getUniqueExitBlocks(ExitBlocks);
3240 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3241 // instruction as we don't know how to split those exit blocks.
3242 // FIXME: We should teach SplitBlock to handle this and remove this
3243 // restriction.
3244 for (auto *ExitBB : ExitBlocks) {
3245 auto *I = ExitBB->getFirstNonPHI();
3246 if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
3247 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3248 "in exit block\n");
3249 return false;
3250 }
3251 }
3252
3253 return true;
3254}
3255
3256static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3257 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3258 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3259 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3260 // Given that unswitching these terminators will require duplicating parts of
3261 // the loop, so we need to be able to model that cost. Compute the ephemeral
3262 // values and set up a data structure to hold per-BB costs. We cache each
3263 // block's cost so that we don't recompute this when considering different
3264 // subsets of the loop for duplication during unswitching.
3266 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3268
3269 // Compute the cost of each block, as well as the total loop cost. Also, bail
3270 // out if we see instructions which are incompatible with loop unswitching
3271 // (convergent, noduplicate, or cross-basic-block tokens).
3272 // FIXME: We might be able to safely handle some of these in non-duplicated
3273 // regions.
3275 L.getHeader()->getParent()->hasMinSize()
3278 InstructionCost LoopCost = 0;
3279 for (auto *BB : L.blocks()) {
3281 for (auto &I : *BB) {
3282 if (EphValues.count(&I))
3283 continue;
3285 }
3286 assert(Cost >= 0 && "Must not have negative costs!");
3287 LoopCost += Cost;
3288 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3289 BBCostMap[BB] = Cost;
3290 }
3291 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3292
3293 // Now we find the best candidate by searching for the one with the following
3294 // properties in order:
3295 //
3296 // 1) An unswitching cost below the threshold
3297 // 2) The smallest number of duplicated unswitch candidates (to avoid
3298 // creating redundant subsequent unswitching)
3299 // 3) The smallest cost after unswitching.
3300 //
3301 // We prioritize reducing fanout of unswitch candidates provided the cost
3302 // remains below the threshold because this has a multiplicative effect.
3303 //
3304 // This requires memoizing each dominator subtree to avoid redundant work.
3305 //
3306 // FIXME: Need to actually do the number of candidates part above.
3308 // Given a terminator which might be unswitched, computes the non-duplicated
3309 // cost for that terminator.
3310 auto ComputeUnswitchedCost = [&](Instruction &TI,
3311 bool FullUnswitch) -> InstructionCost {
3312 // Unswitching selects unswitches the entire loop.
3313 if (isa<SelectInst>(TI))
3314 return LoopCost;
3315
3316 BasicBlock &BB = *TI.getParent();
3318
3320 for (BasicBlock *SuccBB : successors(&BB)) {
3321 // Don't count successors more than once.
3322 if (!Visited.insert(SuccBB).second)
3323 continue;
3324
3325 // If this is a partial unswitch candidate, then it must be a conditional
3326 // branch with a condition of either `or`, `and`, their corresponding
3327 // select forms or partially invariant instructions. In that case, one of
3328 // the successors is necessarily duplicated, so don't even try to remove
3329 // its cost.
3330 if (!FullUnswitch) {
3331 auto &BI = cast<BranchInst>(TI);
3332 Value *Cond = skipTrivialSelect(BI.getCondition());
3333 if (match(Cond, m_LogicalAnd())) {
3334 if (SuccBB == BI.getSuccessor(1))
3335 continue;
3336 } else if (match(Cond, m_LogicalOr())) {
3337 if (SuccBB == BI.getSuccessor(0))
3338 continue;
3339 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3340 SuccBB == BI.getSuccessor(0)) ||
3341 (!PartialIVInfo.KnownValue->isOneValue() &&
3342 SuccBB == BI.getSuccessor(1)))
3343 continue;
3344 }
3345
3346 // This successor's domtree will not need to be duplicated after
3347 // unswitching if the edge to the successor dominates it (and thus the
3348 // entire tree). This essentially means there is no other path into this
3349 // subtree and so it will end up live in only one clone of the loop.
3350 if (SuccBB->getUniquePredecessor() ||
3351 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3352 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3353 })) {
3354 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3355 assert(Cost <= LoopCost &&
3356 "Non-duplicated cost should never exceed total loop cost!");
3357 }
3358 }
3359
3360 // Now scale the cost by the number of unique successors minus one. We
3361 // subtract one because there is already at least one copy of the entire
3362 // loop. This is computing the new cost of unswitching a condition.
3363 // Note that guards always have 2 unique successors that are implicit and
3364 // will be materialized if we decide to unswitch it.
3365 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3366 assert(SuccessorsCount > 1 &&
3367 "Cannot unswitch a condition without multiple distinct successors!");
3368 return (LoopCost - Cost) * (SuccessorsCount - 1);
3369 };
3370
3371 std::optional<NonTrivialUnswitchCandidate> Best;
3372 for (auto &Candidate : UnswitchCandidates) {
3373 Instruction &TI = *Candidate.TI;
3374 ArrayRef<Value *> Invariants = Candidate.Invariants;
3375 BranchInst *BI = dyn_cast<BranchInst>(&TI);
3376 bool FullUnswitch =
3377 !BI || Candidate.hasPendingInjection() ||
3378 (Invariants.size() == 1 &&
3379 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3380 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3381 // Calculate cost multiplier which is a tool to limit potentially
3382 // exponential behavior of loop-unswitch.
3384 int CostMultiplier =
3385 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3386 assert(
3387 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3388 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3389 CandidateCost *= CostMultiplier;
3390 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3391 << " (multiplier: " << CostMultiplier << ")"
3392 << " for unswitch candidate: " << TI << "\n");
3393 } else {
3394 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3395 << " for unswitch candidate: " << TI << "\n");
3396 }
3397
3398 if (!Best || CandidateCost < Best->Cost) {
3399 Best = Candidate;
3400 Best->Cost = CandidateCost;
3401 }
3402 }
3403 assert(Best && "Must be!");
3404 return *Best;
3405}
3406
3407// Insert a freeze on an unswitched branch if all is true:
3408// 1. freeze-loop-unswitch-cond option is true
3409// 2. The branch may not execute in the loop pre-transformation. If a branch may
3410// not execute and could cause UB, it would always cause UB if it is hoisted outside
3411// of the loop. Insert a freeze to prevent this case.
3412// 3. The branch condition may be poison or undef
3414 AssumptionCache &AC) {
3415 assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3417 return false;
3418
3419 ICFLoopSafetyInfo SafetyInfo;
3420 SafetyInfo.computeLoopSafetyInfo(&L);
3421 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3422 return false;
3423
3424 Value *Cond;
3425 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3426 Cond = skipTrivialSelect(BI->getCondition());
3427 else
3428 Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
3430 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3431}
3432
3434 Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
3436 function_ref<void(bool, bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3438 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3439 // Collect all invariant conditions within this loop (as opposed to an inner
3440 // loop which would be handled when visiting that inner loop).
3442 IVConditionInfo PartialIVInfo;
3443 Instruction *PartialIVCondBranch = nullptr;
3444 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3445 PartialIVCondBranch, L, LI, AA, MSSAU);
3446 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3447 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3448 PartialIVCondBranch, L, DT, LI, AA,
3449 MSSAU);
3450 // If we didn't find any candidates, we're done.
3451 if (UnswitchCandidates.empty())
3452 return false;
3453
3454 LLVM_DEBUG(
3455 dbgs() << "Considering " << UnswitchCandidates.size()
3456 << " non-trivial loop invariant conditions for unswitching.\n");
3457
3458 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3459 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3460
3461 assert(Best.TI && "Failed to find loop unswitch candidate");
3462 assert(Best.Cost && "Failed to compute cost");
3463
3464 if (*Best.Cost >= UnswitchThreshold) {
3465 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3466 << "\n");
3467 return false;
3468 }
3469
3470 bool InjectedCondition = false;
3471 if (Best.hasPendingInjection()) {
3472 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3473 InjectedCondition = true;
3474 }
3475 assert(!Best.hasPendingInjection() &&
3476 "All injections should have been done by now!");
3477
3478 if (Best.TI != PartialIVCondBranch)
3479 PartialIVInfo.InstToDuplicate.clear();
3480
3481 bool InsertFreeze;
3482 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3483 // If the best candidate is a select, turn it into a branch. Select
3484 // instructions with a poison conditional do not propagate poison, but
3485 // branching on poison causes UB. Insert a freeze on the select
3486 // conditional to prevent UB after turning the select into a branch.
3487 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3488 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3489 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3490 } else {
3491 // If the best candidate is a guard, turn it into a branch.
3492 if (isGuard(Best.TI))
3493 Best.TI =
3494 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3495 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3496 }
3497
3498 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3499 << ") terminator: " << *Best.TI << "\n");
3500 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3501 LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB,
3502 InsertFreeze, InjectedCondition);
3503 return true;
3504}
3505
3506/// Unswitch control flow predicated on loop invariant conditions.
3507///
3508/// This first hoists all branches or switches which are trivial (IE, do not
3509/// require duplicating any part of the loop) out of the loop body. It then
3510/// looks at other loop invariant control flows and tries to unswitch those as
3511/// well by cloning the loop if the result is small enough.
3512///
3513/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3514/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3515/// valid (i.e. its use is enabled).
3516///
3517/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3518/// true, we will attempt to do non-trivial unswitching as well as trivial
3519/// unswitching.
3520///
3521/// The `UnswitchCB` callback provided will be run after unswitching is
3522/// complete, with the first parameter set to `true` if the provided loop
3523/// remains a loop, and a list of new sibling loops created.
3524///
3525/// If `SE` is non-null, we will update that analysis based on the unswitching
3526/// done.
3527static bool
3529 AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
3530 bool NonTrivial,
3531 function_ref<void(bool, bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3534 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3535 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3536 "Loops must be in LCSSA form before unswitching.");
3537
3538 // Must be in loop simplified form: we need a preheader and dedicated exits.
3539 if (!L.isLoopSimplifyForm())
3540 return false;
3541
3542 // Try trivial unswitch first before loop over other basic blocks in the loop.
3543 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3544 // If we unswitched successfully we will want to clean up the loop before
3545 // processing it further so just mark it as unswitched and return.
3546 UnswitchCB(/*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3547 /*InjectedCondition*/ false, {});
3548 return true;
3549 }
3550
3551 const Function *F = L.getHeader()->getParent();
3552
3553 // Check whether we should continue with non-trivial conditions.
3554 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3555 // unswitching for testing and debugging.
3556 // NonTrivial: Parameter that enables non-trivial unswitching for this
3557 // invocation of the transform. But this should be allowed only
3558 // for targets without branch divergence.
3559 //
3560 // FIXME: If divergence analysis becomes available to a loop
3561 // transform, we should allow unswitching for non-trivial uniform
3562 // branches even on targets that have divergence.
3563 // https://bugs.llvm.org/show_bug.cgi?id=48819
3564 bool ContinueWithNonTrivial =
3566 if (!ContinueWithNonTrivial)
3567 return false;
3568
3569 // Skip non-trivial unswitching for optsize functions.
3570 if (F->hasOptSize())
3571 return false;
3572
3573 // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
3574 // of the loops L is nested in, and of the loops nested in L are all cold.
3575 auto IsLoopNestCold = [&](const Loop *L) {
3576 // Check L and all of its parent loops.
3577 auto *Parent = L;
3578 while (Parent) {
3579 if (!PSI->isColdBlock(Parent->getHeader(), BFI))
3580 return false;
3581 Parent = Parent->getParentLoop();
3582 }
3583 // Next check all loops nested within L.
3585 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
3586 L->getSubLoops().end());
3587 while (!Worklist.empty()) {
3588 auto *CurLoop = Worklist.pop_back_val();
3589 if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
3590 return false;
3591 Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),
3592 CurLoop->getSubLoops().end());
3593 }
3594 return true;
3595 };
3596
3597 // Skip cold loops in cold loop nests, as unswitching them brings little
3598 // benefit but increases the code size
3599 if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
3600 LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
3601 return false;
3602 }
3603
3604 // Perform legality checks.
3606 return false;
3607
3608 // For non-trivial unswitching, because it often creates new loops, we rely on
3609 // the pass manager to iterate on the loops rather than trying to immediately
3610 // reach a fixed point. There is no substantial advantage to iterating
3611 // internally, and if any of the new loops are simplified enough to contain
3612 // trivial unswitching we want to prefer those.
3613
3614 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3615 // a partial unswitch when possible below the threshold.
3616 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
3617 DestroyLoopCB))
3618 return true;
3619
3620 // No other opportunities to unswitch.
3621 return false;
3622}
3623
3626 LPMUpdater &U) {
3627 Function &F = *L.getHeader()->getParent();
3628 (void)F;
3629 ProfileSummaryInfo *PSI = nullptr;
3630 if (auto OuterProxy =
3632 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3633 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3634 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3635 << "\n");
3636
3637 // Save the current loop name in a variable so that we can report it even
3638 // after it has been deleted.
3639 std::string LoopName = std::string(L.getName());
3640
3641 auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3642 bool PartiallyInvariant,
3643 bool InjectedCondition,
3644 ArrayRef<Loop *> NewLoops) {
3645 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3646 if (!NewLoops.empty())
3647 U.addSiblingLoops(NewLoops);
3648
3649 // If the current loop remains valid, we should revisit it to catch any
3650 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3651 if (CurrentLoopValid) {
3652 if (PartiallyInvariant) {
3653 // Mark the new loop as partially unswitched, to avoid unswitching on
3654 // the same condition again.
3655 auto &Context = L.getHeader()->getContext();
3656 MDNode *DisableUnswitchMD = MDNode::get(
3657 Context,
3658 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3660 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3661 {DisableUnswitchMD});
3662 L.setLoopID(NewLoopID);
3663 } else if (InjectedCondition) {
3664 // Do the same for injection of invariant conditions.
3665 auto &Context = L.getHeader()->getContext();
3666 MDNode *DisableUnswitchMD = MDNode::get(
3667 Context,
3668 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
3670 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
3671 {DisableUnswitchMD});
3672 L.setLoopID(NewLoopID);
3673 } else
3674 U.revisitCurrentLoop();
3675 } else
3676 U.markLoopAsDeleted(L, LoopName);
3677 };
3678
3679 auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
3680 U.markLoopAsDeleted(L, Name);
3681 };
3682
3683 std::optional<MemorySSAUpdater> MSSAU;
3684 if (AR.MSSA) {
3685 MSSAU = MemorySSAUpdater(AR.MSSA);
3686 if (VerifyMemorySSA)
3687 AR.MSSA->verifyMemorySSA();
3688 }
3689 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3690 UnswitchCB, &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI,
3691 DestroyLoopCB))
3692 return PreservedAnalyses::all();
3693
3694 if (AR.MSSA && VerifyMemorySSA)
3695 AR.MSSA->verifyMemorySSA();
3696
3697 // Historically this pass has had issues with the dominator tree so verify it
3698 // in asserts builds.
3699 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3700
3701 auto PA = getLoopPassPreservedAnalyses();
3702 if (AR.MSSA)
3703 PA.preserve<MemorySSAAnalysis>();
3704 return PA;
3705}
3706
3708 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3710 OS, MapClassName2PassName);
3711
3712 OS << '<';
3713 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3714 OS << (Trivial ? "" : "no-") << "trivial";
3715 OS << '>';
3716}
3717
3718namespace {
3719
3720class SimpleLoopUnswitchLegacyPass : public LoopPass {
3721 bool NonTrivial;
3722
3723public:
3724 static char ID; // Pass ID, replacement for typeid
3725
3726 explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3727 : LoopPass(ID), NonTrivial(NonTrivial) {
3730 }
3731
3732 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3733
3734 void getAnalysisUsage(AnalysisUsage &AU) const override {
3740 }
3741};
3742
3743} // end anonymous namespace
3744
3745bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3746 if (skipLoop(L))
3747 return false;
3748
3749 Function &F = *L->getHeader()->getParent();
3750
3751 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
3752 << "\n");
3753 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3754 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3755 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3756 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3757 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3758 MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3759 MemorySSAUpdater MSSAU(MSSA);
3760
3761 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3762 auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3763
3764 auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3765 bool InjectedCondition,
3766 ArrayRef<Loop *> NewLoops) {
3767 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3768 for (auto *NewL : NewLoops)
3769 LPM.addLoop(*NewL);
3770
3771 // If the current loop remains valid, re-add it to the queue. This is
3772 // a little wasteful as we'll finish processing the current loop as well,
3773 // but it is the best we can do in the old PM.
3774 if (CurrentLoopValid) {
3775 // If the current loop has been unswitched using a partially invariant
3776 // condition or injected invariant condition, we should not re-add the
3777 // current loop to avoid unswitching on the same condition again.
3778 if (!PartiallyInvariant && !InjectedCondition)
3779 LPM.addLoop(*L);
3780 } else
3781 LPM.markLoopAsDeleted(*L);
3782 };
3783
3784 auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
3785 LPM.markLoopAsDeleted(L);
3786 };
3787
3788 if (VerifyMemorySSA)
3789 MSSA->verifyMemorySSA();
3790 bool Changed =
3791 unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
3792 &MSSAU, nullptr, nullptr, DestroyLoopCB);
3793
3794 if (VerifyMemorySSA)
3795 MSSA->verifyMemorySSA();
3796
3797 // Historically this pass has had issues with the dominator tree so verify it
3798 // in asserts builds.
3799 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
3800
3801 return Changed;
3802}
3803
3804char SimpleLoopUnswitchLegacyPass::ID = 0;
3805INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3806 "Simple unswitch loops", false, false)
3813INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3815
3817 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3818}
assume Assume Builder
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:496
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
loops
Definition: LoopInfo.cpp:1176
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
LLVMContext & Context
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static void canonicalizeForInvariantConditionInjection(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref< void(bool, bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref< void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze, bool InjectedCondition)
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
simple loop unswitch
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref< void(bool, bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
Unswitch control flow predicated on loop invariant conditions.
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, function_ref< void(Loop &, StringRef)> DestroyLoopCB)
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:337
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:393
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
size_t size() const
Definition: BasicBlock.h:345
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.h:263
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:353
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1362
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
This is an important base class in LLVM.
Definition: Constant.h:41
bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:110
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator begin()
Definition: DenseMap.h:75
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:79
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2494
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1111
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1466
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1488
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2625
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
const BasicBlock * getParent() const
Definition: Instruction.h:90
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:302
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool isTerminator() const
Definition: Instruction.h:198
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
SymbolTableList< Instruction >::iterator insertInto(BasicBlock *ParentBB, SymbolTableList< Instruction >::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Definition: Instruction.cpp:99
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:83
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:110
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:594
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
StringRef getName() const
Definition: LoopInfo.h:391
Metadata node.
Definition: Metadata.h:950
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1416
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:499
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block.
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:975
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1857
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:765
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1058
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition: SmallPtrSet.h:93
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void reserve(size_type N)
Definition: SmallVector.h:667
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
void push_back(EltTy NewVal)
bool empty() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getIntegerBitWidth() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:164
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:151
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:378
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:988
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:525
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:168
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:545
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:330
void stable_sort(R &&Range)
Definition: STLExtras.h:1971
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1747
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1727
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:529
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2037
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1042
Op::Description Desc
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:886
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1652
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:89
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:71
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:50
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:256
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1926
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1125
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2021
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1818
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:346
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:530
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:533
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:536
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:223
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371