LLVM 19.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Use.h"
47#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <iterator>
64#include <numeric>
65#include <optional>
66#include <utility>
67
68#define DEBUG_TYPE "simple-loop-unswitch"
69
70using namespace llvm;
71using namespace llvm::PatternMatch;
72
73STATISTIC(NumBranches, "Number of branches unswitched");
74STATISTIC(NumSwitches, "Number of switches unswitched");
75STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
76STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
77STATISTIC(NumTrivial, "Number of unswitches that are trivial");
79 NumCostMultiplierSkipped,
80 "Number of unswitch candidates that had their cost multiplier skipped");
81STATISTIC(NumInvariantConditionsInjected,
82 "Number of invariant conditions injected and unswitched");
83
85 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
86 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
87 "following the configuration passed into the pass."));
88
89static cl::opt<int>
90 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
91 cl::desc("The cost threshold for unswitching a loop."));
92
94 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
95 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
96 "explosion in nontrivial unswitch."));
98 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
99 cl::desc("Toplevel siblings divisor for cost multiplier."));
101 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
102 cl::desc("Number of unswitch candidates that are ignored when calculating "
103 "cost multiplier."));
105 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
106 cl::desc("If enabled, simple loop unswitching will also consider "
107 "llvm.experimental.guard intrinsics as unswitch candidates."));
109 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
110 cl::init(false), cl::Hidden,
111 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
112 "null checks to save time analyzing if we can keep it."));
114 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
115 cl::desc("Max number of memory uses to explore during "
116 "partial unswitching analysis"),
117 cl::init(100), cl::Hidden);
119 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
120 cl::desc("If enabled, the freeze instruction will be added to condition "
121 "of loop unswitch to prevent miscompilation."));
122
124 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
125 cl::desc("Whether we should inject new invariants and unswitch them to "
126 "eliminate some existing (non-invariant) conditions."),
127 cl::init(true));
128
130 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
131 cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
132 "unswitch on them to eliminate branches that are "
133 "not-taken 1/<this option> times or less."),
134 cl::init(16));
135
136namespace {
137struct CompareDesc {
138 BranchInst *Term;
139 Value *Invariant;
140 BasicBlock *InLoopSucc;
141
142 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
143 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
144};
145
146struct InjectedInvariant {
148 Value *LHS;
149 Value *RHS;
150 BasicBlock *InLoopSucc;
151
152 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
153 BasicBlock *InLoopSucc)
154 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
155};
156
157struct NonTrivialUnswitchCandidate {
158 Instruction *TI = nullptr;
159 TinyPtrVector<Value *> Invariants;
160 std::optional<InstructionCost> Cost;
161 std::optional<InjectedInvariant> PendingInjection;
162 NonTrivialUnswitchCandidate(
163 Instruction *TI, ArrayRef<Value *> Invariants,
164 std::optional<InstructionCost> Cost = std::nullopt,
165 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
166 : TI(TI), Invariants(Invariants), Cost(Cost),
167 PendingInjection(PendingInjection) {};
168
169 bool hasPendingInjection() const { return PendingInjection.has_value(); }
170};
171} // end anonymous namespace.
172
173// Helper to skip (select x, true, false), which matches both a logical AND and
174// OR and can confuse code that tries to determine if \p Cond is either a
175// logical AND or OR but not both.
177 Value *CondNext;
178 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
179 Cond = CondNext;
180 return Cond;
181}
182
183/// Collect all of the loop invariant input values transitively used by the
184/// homogeneous instruction graph from a given root.
185///
186/// This essentially walks from a root recursively through loop variant operands
187/// which have perform the same logical operation (AND or OR) and finds all
188/// inputs which are loop invariant. For some operations these can be
189/// re-associated and unswitched out of the loop entirely.
192 const LoopInfo &LI) {
193 assert(!L.isLoopInvariant(&Root) &&
194 "Only need to walk the graph if root itself is not invariant.");
195 TinyPtrVector<Value *> Invariants;
196
197 bool IsRootAnd = match(&Root, m_LogicalAnd());
198 bool IsRootOr = match(&Root, m_LogicalOr());
199
200 // Build a worklist and recurse through operators collecting invariants.
203 Worklist.push_back(&Root);
204 Visited.insert(&Root);
205 do {
206 Instruction &I = *Worklist.pop_back_val();
207 for (Value *OpV : I.operand_values()) {
208 // Skip constants as unswitching isn't interesting for them.
209 if (isa<Constant>(OpV))
210 continue;
211
212 // Add it to our result if loop invariant.
213 if (L.isLoopInvariant(OpV)) {
214 Invariants.push_back(OpV);
215 continue;
216 }
217
218 // If not an instruction with the same opcode, nothing we can do.
219 Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
220
221 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
222 (IsRootOr && match(OpI, m_LogicalOr())))) {
223 // Visit this operand.
224 if (Visited.insert(OpI).second)
225 Worklist.push_back(OpI);
226 }
227 }
228 } while (!Worklist.empty());
229
230 return Invariants;
231}
232
233static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
234 Constant &Replacement) {
235 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
236
237 // Replace uses of LIC in the loop with the given constant.
238 // We use make_early_inc_range as set invalidates the iterator.
239 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
240 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
241
242 // Replace this use within the loop body.
243 if (UserI && L.contains(UserI))
244 U.set(&Replacement);
245 }
246}
247
248/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
249/// incoming values along this edge.
251 const BasicBlock &ExitingBB,
252 const BasicBlock &ExitBB) {
253 for (const Instruction &I : ExitBB) {
254 auto *PN = dyn_cast<PHINode>(&I);
255 if (!PN)
256 // No more PHIs to check.
257 return true;
258
259 // If the incoming value for this edge isn't loop invariant the unswitch
260 // won't be trivial.
261 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
262 return false;
263 }
264 llvm_unreachable("Basic blocks should never be empty!");
265}
266
267/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
268/// end of \p BB and conditionally branch on the copied condition. We only
269/// branch on a single value.
271 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
272 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
273 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
274 IRBuilder<> IRB(&BB);
275
276 SmallVector<Value *> FrozenInvariants;
277 for (Value *Inv : Invariants) {
278 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
279 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
280 FrozenInvariants.push_back(Inv);
281 }
282
283 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
284 : IRB.CreateAnd(FrozenInvariants);
285 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
286 Direction ? &NormalSucc : &UnswitchedSucc);
287}
288
289/// Copy a set of loop invariant values, and conditionally branch on them.
291 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
292 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
293 MemorySSAUpdater *MSSAU) {
295 for (auto *Val : reverse(ToDuplicate)) {
296 Instruction *Inst = cast<Instruction>(Val);
297 Instruction *NewInst = Inst->clone();
298 NewInst->insertInto(&BB, BB.end());
299 RemapInstruction(NewInst, VMap,
301 VMap[Val] = NewInst;
302
303 if (!MSSAU)
304 continue;
305
306 MemorySSA *MSSA = MSSAU->getMemorySSA();
307 if (auto *MemUse =
308 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
309 auto *DefiningAccess = MemUse->getDefiningAccess();
310 // Get the first defining access before the loop.
311 while (L.contains(DefiningAccess->getBlock())) {
312 // If the defining access is a MemoryPhi, get the incoming
313 // value for the pre-header as defining access.
314 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
315 DefiningAccess =
316 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
317 else
318 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
319 }
320 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
321 NewInst->getParent(),
323 }
324 }
325
326 IRBuilder<> IRB(&BB);
327 Value *Cond = VMap[ToDuplicate[0]];
328 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
329 Direction ? &NormalSucc : &UnswitchedSucc);
330}
331
332/// Rewrite the PHI nodes in an unswitched loop exit basic block.
333///
334/// Requires that the loop exit and unswitched basic block are the same, and
335/// that the exiting block was a unique predecessor of that block. Rewrites the
336/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
337/// PHI nodes from the old preheader that now contains the unswitched
338/// terminator.
340 BasicBlock &OldExitingBB,
341 BasicBlock &OldPH) {
342 for (PHINode &PN : UnswitchedBB.phis()) {
343 // When the loop exit is directly unswitched we just need to update the
344 // incoming basic block. We loop to handle weird cases with repeated
345 // incoming blocks, but expect to typically only have one operand here.
346 for (auto i : seq<int>(0, PN.getNumOperands())) {
347 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
348 "Found incoming block different from unique predecessor!");
349 PN.setIncomingBlock(i, &OldPH);
350 }
351 }
352}
353
354/// Rewrite the PHI nodes in the loop exit basic block and the split off
355/// unswitched block.
356///
357/// Because the exit block remains an exit from the loop, this rewrites the
358/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
359/// nodes into the unswitched basic block to select between the value in the
360/// old preheader and the loop exit.
362 BasicBlock &UnswitchedBB,
363 BasicBlock &OldExitingBB,
364 BasicBlock &OldPH,
365 bool FullUnswitch) {
366 assert(&ExitBB != &UnswitchedBB &&
367 "Must have different loop exit and unswitched blocks!");
368 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
369 for (PHINode &PN : ExitBB.phis()) {
370 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
371 PN.getName() + ".split");
372 NewPN->insertBefore(InsertPt);
373
374 // Walk backwards over the old PHI node's inputs to minimize the cost of
375 // removing each one. We have to do this weird loop manually so that we
376 // create the same number of new incoming edges in the new PHI as we expect
377 // each case-based edge to be included in the unswitched switch in some
378 // cases.
379 // FIXME: This is really, really gross. It would be much cleaner if LLVM
380 // allowed us to create a single entry for a predecessor block without
381 // having separate entries for each "edge" even though these edges are
382 // required to produce identical results.
383 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
384 if (PN.getIncomingBlock(i) != &OldExitingBB)
385 continue;
386
387 Value *Incoming = PN.getIncomingValue(i);
388 if (FullUnswitch)
389 // No more edge from the old exiting block to the exit block.
390 PN.removeIncomingValue(i);
391
392 NewPN->addIncoming(Incoming, &OldPH);
393 }
394
395 // Now replace the old PHI with the new one and wire the old one in as an
396 // input to the new one.
397 PN.replaceAllUsesWith(NewPN);
398 NewPN->addIncoming(&PN, &ExitBB);
399 }
400}
401
402/// Hoist the current loop up to the innermost loop containing a remaining exit.
403///
404/// Because we've removed an exit from the loop, we may have changed the set of
405/// loops reachable and need to move the current loop up the loop nest or even
406/// to an entirely separate nest.
407static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
408 DominatorTree &DT, LoopInfo &LI,
409 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
410 // If the loop is already at the top level, we can't hoist it anywhere.
411 Loop *OldParentL = L.getParentLoop();
412 if (!OldParentL)
413 return;
414
416 L.getExitBlocks(Exits);
417 Loop *NewParentL = nullptr;
418 for (auto *ExitBB : Exits)
419 if (Loop *ExitL = LI.getLoopFor(ExitBB))
420 if (!NewParentL || NewParentL->contains(ExitL))
421 NewParentL = ExitL;
422
423 if (NewParentL == OldParentL)
424 return;
425
426 // The new parent loop (if different) should always contain the old one.
427 if (NewParentL)
428 assert(NewParentL->contains(OldParentL) &&
429 "Can only hoist this loop up the nest!");
430
431 // The preheader will need to move with the body of this loop. However,
432 // because it isn't in this loop we also need to update the primary loop map.
433 assert(OldParentL == LI.getLoopFor(&Preheader) &&
434 "Parent loop of this loop should contain this loop's preheader!");
435 LI.changeLoopFor(&Preheader, NewParentL);
436
437 // Remove this loop from its old parent.
438 OldParentL->removeChildLoop(&L);
439
440 // Add the loop either to the new parent or as a top-level loop.
441 if (NewParentL)
442 NewParentL->addChildLoop(&L);
443 else
444 LI.addTopLevelLoop(&L);
445
446 // Remove this loops blocks from the old parent and every other loop up the
447 // nest until reaching the new parent. Also update all of these
448 // no-longer-containing loops to reflect the nesting change.
449 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
450 OldContainingL = OldContainingL->getParentLoop()) {
451 llvm::erase_if(OldContainingL->getBlocksVector(),
452 [&](const BasicBlock *BB) {
453 return BB == &Preheader || L.contains(BB);
454 });
455
456 OldContainingL->getBlocksSet().erase(&Preheader);
457 for (BasicBlock *BB : L.blocks())
458 OldContainingL->getBlocksSet().erase(BB);
459
460 // Because we just hoisted a loop out of this one, we have essentially
461 // created new exit paths from it. That means we need to form LCSSA PHI
462 // nodes for values used in the no-longer-nested loop.
463 formLCSSA(*OldContainingL, DT, &LI, SE);
464
465 // We shouldn't need to form dedicated exits because the exit introduced
466 // here is the (just split by unswitching) preheader. However, after trivial
467 // unswitching it is possible to get new non-dedicated exits out of parent
468 // loop so let's conservatively form dedicated exit blocks and figure out
469 // if we can optimize later.
470 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
471 /*PreserveLCSSA*/ true);
472 }
473}
474
475// Return the top-most loop containing ExitBB and having ExitBB as exiting block
476// or the loop containing ExitBB, if there is no parent loop containing ExitBB
477// as exiting block.
479 const LoopInfo &LI) {
480 Loop *TopMost = LI.getLoopFor(ExitBB);
481 Loop *Current = TopMost;
482 while (Current) {
483 if (Current->isLoopExiting(ExitBB))
484 TopMost = Current;
485 Current = Current->getParentLoop();
486 }
487 return TopMost;
488}
489
490/// Unswitch a trivial branch if the condition is loop invariant.
491///
492/// This routine should only be called when loop code leading to the branch has
493/// been validated as trivial (no side effects). This routine checks if the
494/// condition is invariant and one of the successors is a loop exit. This
495/// allows us to unswitch without duplicating the loop, making it trivial.
496///
497/// If this routine fails to unswitch the branch it returns false.
498///
499/// If the branch can be unswitched, this routine splits the preheader and
500/// hoists the branch above that split. Preserves loop simplified form
501/// (splitting the exit block as necessary). It simplifies the branch within
502/// the loop to an unconditional branch but doesn't remove it entirely. Further
503/// cleanup can be done with some simplifycfg like pass.
504///
505/// If `SE` is not null, it will be updated based on the potential loop SCEVs
506/// invalidated by this.
508 LoopInfo &LI, ScalarEvolution *SE,
509 MemorySSAUpdater *MSSAU) {
510 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
511 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
512
513 // The loop invariant values that we want to unswitch.
514 TinyPtrVector<Value *> Invariants;
515
516 // When true, we're fully unswitching the branch rather than just unswitching
517 // some input conditions to the branch.
518 bool FullUnswitch = false;
519
521 if (L.isLoopInvariant(Cond)) {
522 Invariants.push_back(Cond);
523 FullUnswitch = true;
524 } else {
525 if (auto *CondInst = dyn_cast<Instruction>(Cond))
526 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
527 if (Invariants.empty()) {
528 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
529 return false;
530 }
531 }
532
533 // Check that one of the branch's successors exits, and which one.
534 bool ExitDirection = true;
535 int LoopExitSuccIdx = 0;
536 auto *LoopExitBB = BI.getSuccessor(0);
537 if (L.contains(LoopExitBB)) {
538 ExitDirection = false;
539 LoopExitSuccIdx = 1;
540 LoopExitBB = BI.getSuccessor(1);
541 if (L.contains(LoopExitBB)) {
542 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
543 return false;
544 }
545 }
546 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
547 auto *ParentBB = BI.getParent();
548 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
549 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
550 return false;
551 }
552
553 // When unswitching only part of the branch's condition, we need the exit
554 // block to be reached directly from the partially unswitched input. This can
555 // be done when the exit block is along the true edge and the branch condition
556 // is a graph of `or` operations, or the exit block is along the false edge
557 // and the condition is a graph of `and` operations.
558 if (!FullUnswitch) {
559 if (ExitDirection ? !match(Cond, m_LogicalOr())
560 : !match(Cond, m_LogicalAnd())) {
561 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
562 "non-full unswitch!\n");
563 return false;
564 }
565 }
566
567 LLVM_DEBUG({
568 dbgs() << " unswitching trivial invariant conditions for: " << BI
569 << "\n";
570 for (Value *Invariant : Invariants) {
571 dbgs() << " " << *Invariant << " == true";
572 if (Invariant != Invariants.back())
573 dbgs() << " ||";
574 dbgs() << "\n";
575 }
576 });
577
578 // If we have scalar evolutions, we need to invalidate them including this
579 // loop, the loop containing the exit block and the topmost parent loop
580 // exiting via LoopExitBB.
581 if (SE) {
582 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
583 SE->forgetLoop(ExitL);
584 else
585 // Forget the entire nest as this exits the entire nest.
586 SE->forgetTopmostLoop(&L);
588 }
589
590 if (MSSAU && VerifyMemorySSA)
591 MSSAU->getMemorySSA()->verifyMemorySSA();
592
593 // Split the preheader, so that we know that there is a safe place to insert
594 // the conditional branch. We will change the preheader to have a conditional
595 // branch on LoopCond.
596 BasicBlock *OldPH = L.getLoopPreheader();
597 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
598
599 // Now that we have a place to insert the conditional branch, create a place
600 // to branch to: this is the exit block out of the loop that we are
601 // unswitching. We need to split this if there are other loop predecessors.
602 // Because the loop is in simplified form, *any* other predecessor is enough.
603 BasicBlock *UnswitchedBB;
604 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
605 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
606 "A branch's parent isn't a predecessor!");
607 UnswitchedBB = LoopExitBB;
608 } else {
609 UnswitchedBB =
610 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
611 }
612
613 if (MSSAU && VerifyMemorySSA)
614 MSSAU->getMemorySSA()->verifyMemorySSA();
615
616 // Actually move the invariant uses into the unswitched position. If possible,
617 // we do this by moving the instructions, but when doing partial unswitching
618 // we do it by building a new merge of the values in the unswitched position.
619 OldPH->getTerminator()->eraseFromParent();
620 if (FullUnswitch) {
621 // If fully unswitching, we can use the existing branch instruction.
622 // Splice it into the old PH to gate reaching the new preheader and re-point
623 // its successors.
624 BI.moveBefore(*OldPH, OldPH->end());
625 BI.setCondition(Cond);
626 if (MSSAU) {
627 // Temporarily clone the terminator, to make MSSA update cheaper by
628 // separating "insert edge" updates from "remove edge" ones.
629 BI.clone()->insertInto(ParentBB, ParentBB->end());
630 } else {
631 // Create a new unconditional branch that will continue the loop as a new
632 // terminator.
633 BranchInst::Create(ContinueBB, ParentBB);
634 }
635 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
636 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
637 } else {
638 // Only unswitching a subset of inputs to the condition, so we will need to
639 // build a new branch that merges the invariant inputs.
640 if (ExitDirection)
642 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
643 "condition!");
644 else
646 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
647 " condition!");
649 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
650 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
651 }
652
653 // Update the dominator tree with the added edge.
654 DT.insertEdge(OldPH, UnswitchedBB);
655
656 // After the dominator tree was updated with the added edge, update MemorySSA
657 // if available.
658 if (MSSAU) {
660 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
661 MSSAU->applyInsertUpdates(Updates, DT);
662 }
663
664 // Finish updating dominator tree and memory ssa for full unswitch.
665 if (FullUnswitch) {
666 if (MSSAU) {
667 // Remove the cloned branch instruction.
668 ParentBB->getTerminator()->eraseFromParent();
669 // Create unconditional branch now.
670 BranchInst::Create(ContinueBB, ParentBB);
671 MSSAU->removeEdge(ParentBB, LoopExitBB);
672 }
673 DT.deleteEdge(ParentBB, LoopExitBB);
674 }
675
676 if (MSSAU && VerifyMemorySSA)
677 MSSAU->getMemorySSA()->verifyMemorySSA();
678
679 // Rewrite the relevant PHI nodes.
680 if (UnswitchedBB == LoopExitBB)
681 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
682 else
683 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
684 *ParentBB, *OldPH, FullUnswitch);
685
686 // The constant we can replace all of our invariants with inside the loop
687 // body. If any of the invariants have a value other than this the loop won't
688 // be entered.
689 ConstantInt *Replacement = ExitDirection
692
693 // Since this is an i1 condition we can also trivially replace uses of it
694 // within the loop with a constant.
695 for (Value *Invariant : Invariants)
696 replaceLoopInvariantUses(L, Invariant, *Replacement);
697
698 // If this was full unswitching, we may have changed the nesting relationship
699 // for this loop so hoist it to its correct parent if needed.
700 if (FullUnswitch)
701 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
702
703 if (MSSAU && VerifyMemorySSA)
704 MSSAU->getMemorySSA()->verifyMemorySSA();
705
706 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
707 ++NumTrivial;
708 ++NumBranches;
709 return true;
710}
711
712/// Unswitch a trivial switch if the condition is loop invariant.
713///
714/// This routine should only be called when loop code leading to the switch has
715/// been validated as trivial (no side effects). This routine checks if the
716/// condition is invariant and that at least one of the successors is a loop
717/// exit. This allows us to unswitch without duplicating the loop, making it
718/// trivial.
719///
720/// If this routine fails to unswitch the switch it returns false.
721///
722/// If the switch can be unswitched, this routine splits the preheader and
723/// copies the switch above that split. If the default case is one of the
724/// exiting cases, it copies the non-exiting cases and points them at the new
725/// preheader. If the default case is not exiting, it copies the exiting cases
726/// and points the default at the preheader. It preserves loop simplified form
727/// (splitting the exit blocks as necessary). It simplifies the switch within
728/// the loop by removing now-dead cases. If the default case is one of those
729/// unswitched, it replaces its destination with a new basic block containing
730/// only unreachable. Such basic blocks, while technically loop exits, are not
731/// considered for unswitching so this is a stable transform and the same
732/// switch will not be revisited. If after unswitching there is only a single
733/// in-loop successor, the switch is further simplified to an unconditional
734/// branch. Still more cleanup can be done with some simplifycfg like pass.
735///
736/// If `SE` is not null, it will be updated based on the potential loop SCEVs
737/// invalidated by this.
739 LoopInfo &LI, ScalarEvolution *SE,
740 MemorySSAUpdater *MSSAU) {
741 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
742 Value *LoopCond = SI.getCondition();
743
744 // If this isn't switching on an invariant condition, we can't unswitch it.
745 if (!L.isLoopInvariant(LoopCond))
746 return false;
747
748 auto *ParentBB = SI.getParent();
749
750 // The same check must be used both for the default and the exit cases. We
751 // should never leave edges from the switch instruction to a basic block that
752 // we are unswitching, hence the condition used to determine the default case
753 // needs to also be used to populate ExitCaseIndices, which is then used to
754 // remove cases from the switch.
755 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
756 // BBToCheck is not an exit block if it is inside loop L.
757 if (L.contains(&BBToCheck))
758 return false;
759 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
760 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
761 return false;
762 // We do not unswitch a block that only has an unreachable statement, as
763 // it's possible this is a previously unswitched block. Only unswitch if
764 // either the terminator is not unreachable, or, if it is, it's not the only
765 // instruction in the block.
766 auto *TI = BBToCheck.getTerminator();
767 bool isUnreachable = isa<UnreachableInst>(TI);
768 return !isUnreachable ||
769 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
770 };
771
772 SmallVector<int, 4> ExitCaseIndices;
773 for (auto Case : SI.cases())
774 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
775 ExitCaseIndices.push_back(Case.getCaseIndex());
776 BasicBlock *DefaultExitBB = nullptr;
779 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
780 DefaultExitBB = SI.getDefaultDest();
781 } else if (ExitCaseIndices.empty())
782 return false;
783
784 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
785
786 if (MSSAU && VerifyMemorySSA)
787 MSSAU->getMemorySSA()->verifyMemorySSA();
788
789 // We may need to invalidate SCEVs for the outermost loop reached by any of
790 // the exits.
791 Loop *OuterL = &L;
792
793 if (DefaultExitBB) {
794 // Check the loop containing this exit.
795 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
796 if (!ExitL || ExitL->contains(OuterL))
797 OuterL = ExitL;
798 }
799 for (unsigned Index : ExitCaseIndices) {
800 auto CaseI = SI.case_begin() + Index;
801 // Compute the outer loop from this exit.
802 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
803 if (!ExitL || ExitL->contains(OuterL))
804 OuterL = ExitL;
805 }
806
807 if (SE) {
808 if (OuterL)
809 SE->forgetLoop(OuterL);
810 else
811 SE->forgetTopmostLoop(&L);
812 }
813
814 if (DefaultExitBB) {
815 // Clear out the default destination temporarily to allow accurate
816 // predecessor lists to be examined below.
817 SI.setDefaultDest(nullptr);
818 }
819
820 // Store the exit cases into a separate data structure and remove them from
821 // the switch.
822 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
824 4> ExitCases;
825 ExitCases.reserve(ExitCaseIndices.size());
827 // We walk the case indices backwards so that we remove the last case first
828 // and don't disrupt the earlier indices.
829 for (unsigned Index : reverse(ExitCaseIndices)) {
830 auto CaseI = SI.case_begin() + Index;
831 // Save the value of this case.
832 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
833 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
834 // Delete the unswitched cases.
835 SIW.removeCase(CaseI);
836 }
837
838 // Check if after this all of the remaining cases point at the same
839 // successor.
840 BasicBlock *CommonSuccBB = nullptr;
841 if (SI.getNumCases() > 0 &&
842 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
843 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
844 }))
845 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
846 if (!DefaultExitBB) {
847 // If we're not unswitching the default, we need it to match any cases to
848 // have a common successor or if we have no cases it is the common
849 // successor.
850 if (SI.getNumCases() == 0)
851 CommonSuccBB = SI.getDefaultDest();
852 else if (SI.getDefaultDest() != CommonSuccBB)
853 CommonSuccBB = nullptr;
854 }
855
856 // Split the preheader, so that we know that there is a safe place to insert
857 // the switch.
858 BasicBlock *OldPH = L.getLoopPreheader();
859 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
860 OldPH->getTerminator()->eraseFromParent();
861
862 // Now add the unswitched switch.
863 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
864 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
865
866 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
867 // First, we split any exit blocks with remaining in-loop predecessors. Then
868 // we update the PHIs in one of two ways depending on if there was a split.
869 // We walk in reverse so that we split in the same order as the cases
870 // appeared. This is purely for convenience of reading the resulting IR, but
871 // it doesn't cost anything really.
872 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
874 // Handle the default exit if necessary.
875 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
876 // ranges aren't quite powerful enough yet.
877 if (DefaultExitBB) {
878 if (pred_empty(DefaultExitBB)) {
879 UnswitchedExitBBs.insert(DefaultExitBB);
880 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
881 } else {
882 auto *SplitBB =
883 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
884 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
885 *ParentBB, *OldPH,
886 /*FullUnswitch*/ true);
887 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
888 }
889 }
890 // Note that we must use a reference in the for loop so that we update the
891 // container.
892 for (auto &ExitCase : reverse(ExitCases)) {
893 // Grab a reference to the exit block in the pair so that we can update it.
894 BasicBlock *ExitBB = std::get<1>(ExitCase);
895
896 // If this case is the last edge into the exit block, we can simply reuse it
897 // as it will no longer be a loop exit. No mapping necessary.
898 if (pred_empty(ExitBB)) {
899 // Only rewrite once.
900 if (UnswitchedExitBBs.insert(ExitBB).second)
901 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
902 continue;
903 }
904
905 // Otherwise we need to split the exit block so that we retain an exit
906 // block from the loop and a target for the unswitched condition.
907 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
908 if (!SplitExitBB) {
909 // If this is the first time we see this, do the split and remember it.
910 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
911 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
912 *ParentBB, *OldPH,
913 /*FullUnswitch*/ true);
914 }
915 // Update the case pair to point to the split block.
916 std::get<1>(ExitCase) = SplitExitBB;
917 }
918
919 // Now add the unswitched cases. We do this in reverse order as we built them
920 // in reverse order.
921 for (auto &ExitCase : reverse(ExitCases)) {
922 ConstantInt *CaseVal = std::get<0>(ExitCase);
923 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
924
925 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
926 }
927
928 // If the default was unswitched, re-point it and add explicit cases for
929 // entering the loop.
930 if (DefaultExitBB) {
931 NewSIW->setDefaultDest(DefaultExitBB);
932 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
933
934 // We removed all the exit cases, so we just copy the cases to the
935 // unswitched switch.
936 for (const auto &Case : SI.cases())
937 NewSIW.addCase(Case.getCaseValue(), NewPH,
939 } else if (DefaultCaseWeight) {
940 // We have to set branch weight of the default case.
941 uint64_t SW = *DefaultCaseWeight;
942 for (const auto &Case : SI.cases()) {
943 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
944 assert(W &&
945 "case weight must be defined as default case weight is defined");
946 SW += *W;
947 }
948 NewSIW.setSuccessorWeight(0, SW);
949 }
950
951 // If we ended up with a common successor for every path through the switch
952 // after unswitching, rewrite it to an unconditional branch to make it easy
953 // to recognize. Otherwise we potentially have to recognize the default case
954 // pointing at unreachable and other complexity.
955 if (CommonSuccBB) {
956 BasicBlock *BB = SI.getParent();
957 // We may have had multiple edges to this common successor block, so remove
958 // them as predecessors. We skip the first one, either the default or the
959 // actual first case.
960 bool SkippedFirst = DefaultExitBB == nullptr;
961 for (auto Case : SI.cases()) {
962 assert(Case.getCaseSuccessor() == CommonSuccBB &&
963 "Non-common successor!");
964 (void)Case;
965 if (!SkippedFirst) {
966 SkippedFirst = true;
967 continue;
968 }
969 CommonSuccBB->removePredecessor(BB,
970 /*KeepOneInputPHIs*/ true);
971 }
972 // Now nuke the switch and replace it with a direct branch.
973 SIW.eraseFromParent();
974 BranchInst::Create(CommonSuccBB, BB);
975 } else if (DefaultExitBB) {
976 assert(SI.getNumCases() > 0 &&
977 "If we had no cases we'd have a common successor!");
978 // Move the last case to the default successor. This is valid as if the
979 // default got unswitched it cannot be reached. This has the advantage of
980 // being simple and keeping the number of edges from this switch to
981 // successors the same, and avoiding any PHI update complexity.
982 auto LastCaseI = std::prev(SI.case_end());
983
984 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
986 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
987 SIW.removeCase(LastCaseI);
988 }
989
990 // Walk the unswitched exit blocks and the unswitched split blocks and update
991 // the dominator tree based on the CFG edits. While we are walking unordered
992 // containers here, the API for applyUpdates takes an unordered list of
993 // updates and requires them to not contain duplicates.
995 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
996 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
997 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
998 }
999 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1000 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1001 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1002 }
1003
1004 if (MSSAU) {
1005 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1006 if (VerifyMemorySSA)
1007 MSSAU->getMemorySSA()->verifyMemorySSA();
1008 } else {
1009 DT.applyUpdates(DTUpdates);
1010 }
1011
1012 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1013
1014 // We may have changed the nesting relationship for this loop so hoist it to
1015 // its correct parent if needed.
1016 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1017
1018 if (MSSAU && VerifyMemorySSA)
1019 MSSAU->getMemorySSA()->verifyMemorySSA();
1020
1021 ++NumTrivial;
1022 ++NumSwitches;
1023 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1024 return true;
1025}
1026
1027/// This routine scans the loop to find a branch or switch which occurs before
1028/// any side effects occur. These can potentially be unswitched without
1029/// duplicating the loop. If a branch or switch is successfully unswitched the
1030/// scanning continues to see if subsequent branches or switches have become
1031/// trivial. Once all trivial candidates have been unswitched, this routine
1032/// returns.
1033///
1034/// The return value indicates whether anything was unswitched (and therefore
1035/// changed).
1036///
1037/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1038/// invalidated by this.
1040 LoopInfo &LI, ScalarEvolution *SE,
1041 MemorySSAUpdater *MSSAU) {
1042 bool Changed = false;
1043
1044 // If loop header has only one reachable successor we should keep looking for
1045 // trivial condition candidates in the successor as well. An alternative is
1046 // to constant fold conditions and merge successors into loop header (then we
1047 // only need to check header's terminator). The reason for not doing this in
1048 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1049 // invariants. Folding dead branches could either eliminate the current loop
1050 // or make other loops unreachable. LCSSA form might also not be preserved
1051 // after deleting branches. The following code keeps traversing loop header's
1052 // successors until it finds the trivial condition candidate (condition that
1053 // is not a constant). Since unswitching generates branches with constant
1054 // conditions, this scenario could be very common in practice.
1055 BasicBlock *CurrentBB = L.getHeader();
1057 Visited.insert(CurrentBB);
1058 do {
1059 // Check if there are any side-effecting instructions (e.g. stores, calls,
1060 // volatile loads) in the part of the loop that the code *would* execute
1061 // without unswitching.
1062 if (MSSAU) // Possible early exit with MSSA
1063 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1064 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1065 return Changed;
1066 if (llvm::any_of(*CurrentBB,
1067 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1068 return Changed;
1069
1070 Instruction *CurrentTerm = CurrentBB->getTerminator();
1071
1072 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1073 // Don't bother trying to unswitch past a switch with a constant
1074 // condition. This should be removed prior to running this pass by
1075 // simplifycfg.
1076 if (isa<Constant>(SI->getCondition()))
1077 return Changed;
1078
1079 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1080 // Couldn't unswitch this one so we're done.
1081 return Changed;
1082
1083 // Mark that we managed to unswitch something.
1084 Changed = true;
1085
1086 // If unswitching turned the terminator into an unconditional branch then
1087 // we can continue. The unswitching logic specifically works to fold any
1088 // cases it can into an unconditional branch to make it easier to
1089 // recognize here.
1090 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1091 if (!BI || BI->isConditional())
1092 return Changed;
1093
1094 CurrentBB = BI->getSuccessor(0);
1095 continue;
1096 }
1097
1098 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1099 if (!BI)
1100 // We do not understand other terminator instructions.
1101 return Changed;
1102
1103 // Don't bother trying to unswitch past an unconditional branch or a branch
1104 // with a constant value. These should be removed by simplifycfg prior to
1105 // running this pass.
1106 if (!BI->isConditional() ||
1107 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1108 return Changed;
1109
1110 // Found a trivial condition candidate: non-foldable conditional branch. If
1111 // we fail to unswitch this, we can't do anything else that is trivial.
1112 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1113 return Changed;
1114
1115 // Mark that we managed to unswitch something.
1116 Changed = true;
1117
1118 // If we only unswitched some of the conditions feeding the branch, we won't
1119 // have collapsed it to a single successor.
1120 BI = cast<BranchInst>(CurrentBB->getTerminator());
1121 if (BI->isConditional())
1122 return Changed;
1123
1124 // Follow the newly unconditional branch into its successor.
1125 CurrentBB = BI->getSuccessor(0);
1126
1127 // When continuing, if we exit the loop or reach a previous visited block,
1128 // then we can not reach any trivial condition candidates (unfoldable
1129 // branch instructions or switch instructions) and no unswitch can happen.
1130 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1131
1132 return Changed;
1133}
1134
1135/// Build the cloned blocks for an unswitched copy of the given loop.
1136///
1137/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1138/// after the split block (`SplitBB`) that will be used to select between the
1139/// cloned and original loop.
1140///
1141/// This routine handles cloning all of the necessary loop blocks and exit
1142/// blocks including rewriting their instructions and the relevant PHI nodes.
1143/// Any loop blocks or exit blocks which are dominated by a different successor
1144/// than the one for this clone of the loop blocks can be trivially skipped. We
1145/// use the `DominatingSucc` map to determine whether a block satisfies that
1146/// property with a simple map lookup.
1147///
1148/// It also correctly creates the unconditional branch in the cloned
1149/// unswitched parent block to only point at the unswitched successor.
1150///
1151/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1152/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1153/// the cloned blocks (and their loops) are left without full `LoopInfo`
1154/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1155/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1156/// instead the caller must recompute an accurate DT. It *does* correctly
1157/// update the `AssumptionCache` provided in `AC`.
1159 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1160 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1161 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1163 ValueToValueMapTy &VMap,
1165 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1166 ScalarEvolution *SE) {
1168 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1169
1170 // We will need to clone a bunch of blocks, wrap up the clone operation in
1171 // a helper.
1172 auto CloneBlock = [&](BasicBlock *OldBB) {
1173 // Clone the basic block and insert it before the new preheader.
1174 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1175 NewBB->moveBefore(LoopPH);
1176
1177 // Record this block and the mapping.
1178 NewBlocks.push_back(NewBB);
1179 VMap[OldBB] = NewBB;
1180
1181 return NewBB;
1182 };
1183
1184 // We skip cloning blocks when they have a dominating succ that is not the
1185 // succ we are cloning for.
1186 auto SkipBlock = [&](BasicBlock *BB) {
1187 auto It = DominatingSucc.find(BB);
1188 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1189 };
1190
1191 // First, clone the preheader.
1192 auto *ClonedPH = CloneBlock(LoopPH);
1193
1194 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1195 for (auto *LoopBB : L.blocks())
1196 if (!SkipBlock(LoopBB))
1197 CloneBlock(LoopBB);
1198
1199 // Split all the loop exit edges so that when we clone the exit blocks, if
1200 // any of the exit blocks are *also* a preheader for some other loop, we
1201 // don't create multiple predecessors entering the loop header.
1202 for (auto *ExitBB : ExitBlocks) {
1203 if (SkipBlock(ExitBB))
1204 continue;
1205
1206 // When we are going to clone an exit, we don't need to clone all the
1207 // instructions in the exit block and we want to ensure we have an easy
1208 // place to merge the CFG, so split the exit first. This is always safe to
1209 // do because there cannot be any non-loop predecessors of a loop exit in
1210 // loop simplified form.
1211 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1212
1213 // Rearrange the names to make it easier to write test cases by having the
1214 // exit block carry the suffix rather than the merge block carrying the
1215 // suffix.
1216 MergeBB->takeName(ExitBB);
1217 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1218
1219 // Now clone the original exit block.
1220 auto *ClonedExitBB = CloneBlock(ExitBB);
1221 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1222 "Exit block should have been split to have one successor!");
1223 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1224 "Cloned exit block has the wrong successor!");
1225
1226 // Remap any cloned instructions and create a merge phi node for them.
1227 for (auto ZippedInsts : llvm::zip_first(
1228 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1229 llvm::make_range(ClonedExitBB->begin(),
1230 std::prev(ClonedExitBB->end())))) {
1231 Instruction &I = std::get<0>(ZippedInsts);
1232 Instruction &ClonedI = std::get<1>(ZippedInsts);
1233
1234 // The only instructions in the exit block should be PHI nodes and
1235 // potentially a landing pad.
1236 assert(
1237 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1238 "Bad instruction in exit block!");
1239 // We should have a value map between the instruction and its clone.
1240 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1241
1242 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1243 if (SE && isa<PHINode>(I))
1244 SE->forgetValue(&I);
1245
1246 auto *MergePN =
1247 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1248 MergePN->insertBefore(MergeBB->getFirstInsertionPt());
1249 I.replaceAllUsesWith(MergePN);
1250 MergePN->addIncoming(&I, ExitBB);
1251 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1252 }
1253 }
1254
1255 // Rewrite the instructions in the cloned blocks to refer to the instructions
1256 // in the cloned blocks. We have to do this as a second pass so that we have
1257 // everything available. Also, we have inserted new instructions which may
1258 // include assume intrinsics, so we update the assumption cache while
1259 // processing this.
1260 Module *M = ClonedPH->getParent()->getParent();
1261 for (auto *ClonedBB : NewBlocks)
1262 for (Instruction &I : *ClonedBB) {
1263 RemapDPValueRange(M, I.getDbgValueRange(), VMap,
1265 RemapInstruction(&I, VMap,
1267 if (auto *II = dyn_cast<AssumeInst>(&I))
1268 AC.registerAssumption(II);
1269 }
1270
1271 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1272 // have spurious incoming values.
1273 for (auto *LoopBB : L.blocks())
1274 if (SkipBlock(LoopBB))
1275 for (auto *SuccBB : successors(LoopBB))
1276 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1277 for (PHINode &PN : ClonedSuccBB->phis())
1278 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1279
1280 // Remove the cloned parent as a predecessor of any successor we ended up
1281 // cloning other than the unswitched one.
1282 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1283 for (auto *SuccBB : successors(ParentBB)) {
1284 if (SuccBB == UnswitchedSuccBB)
1285 continue;
1286
1287 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1288 if (!ClonedSuccBB)
1289 continue;
1290
1291 ClonedSuccBB->removePredecessor(ClonedParentBB,
1292 /*KeepOneInputPHIs*/ true);
1293 }
1294
1295 // Replace the cloned branch with an unconditional branch to the cloned
1296 // unswitched successor.
1297 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1298 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1299 // Trivial Simplification. If Terminator is a conditional branch and
1300 // condition becomes dead - erase it.
1301 Value *ClonedConditionToErase = nullptr;
1302 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1303 ClonedConditionToErase = BI->getCondition();
1304 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1305 ClonedConditionToErase = SI->getCondition();
1306
1307 ClonedTerminator->eraseFromParent();
1308 BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1309
1310 if (ClonedConditionToErase)
1311 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1312 MSSAU);
1313
1314 // If there are duplicate entries in the PHI nodes because of multiple edges
1315 // to the unswitched successor, we need to nuke all but one as we replaced it
1316 // with a direct branch.
1317 for (PHINode &PN : ClonedSuccBB->phis()) {
1318 bool Found = false;
1319 // Loop over the incoming operands backwards so we can easily delete as we
1320 // go without invalidating the index.
1321 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1322 if (PN.getIncomingBlock(i) != ClonedParentBB)
1323 continue;
1324 if (!Found) {
1325 Found = true;
1326 continue;
1327 }
1328 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1329 }
1330 }
1331
1332 // Record the domtree updates for the new blocks.
1334 for (auto *ClonedBB : NewBlocks) {
1335 for (auto *SuccBB : successors(ClonedBB))
1336 if (SuccSet.insert(SuccBB).second)
1337 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1338 SuccSet.clear();
1339 }
1340
1341 return ClonedPH;
1342}
1343
1344/// Recursively clone the specified loop and all of its children.
1345///
1346/// The target parent loop for the clone should be provided, or can be null if
1347/// the clone is a top-level loop. While cloning, all the blocks are mapped
1348/// with the provided value map. The entire original loop must be present in
1349/// the value map. The cloned loop is returned.
1350static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1351 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1352 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1353 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1354 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1355 for (auto *BB : OrigL.blocks()) {
1356 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1357 ClonedL.addBlockEntry(ClonedBB);
1358 if (LI.getLoopFor(BB) == &OrigL)
1359 LI.changeLoopFor(ClonedBB, &ClonedL);
1360 }
1361 };
1362
1363 // We specially handle the first loop because it may get cloned into
1364 // a different parent and because we most commonly are cloning leaf loops.
1365 Loop *ClonedRootL = LI.AllocateLoop();
1366 if (RootParentL)
1367 RootParentL->addChildLoop(ClonedRootL);
1368 else
1369 LI.addTopLevelLoop(ClonedRootL);
1370 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1371
1372 if (OrigRootL.isInnermost())
1373 return ClonedRootL;
1374
1375 // If we have a nest, we can quickly clone the entire loop nest using an
1376 // iterative approach because it is a tree. We keep the cloned parent in the
1377 // data structure to avoid repeatedly querying through a map to find it.
1378 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1379 // Build up the loops to clone in reverse order as we'll clone them from the
1380 // back.
1381 for (Loop *ChildL : llvm::reverse(OrigRootL))
1382 LoopsToClone.push_back({ClonedRootL, ChildL});
1383 do {
1384 Loop *ClonedParentL, *L;
1385 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1386 Loop *ClonedL = LI.AllocateLoop();
1387 ClonedParentL->addChildLoop(ClonedL);
1388 AddClonedBlocksToLoop(*L, *ClonedL);
1389 for (Loop *ChildL : llvm::reverse(*L))
1390 LoopsToClone.push_back({ClonedL, ChildL});
1391 } while (!LoopsToClone.empty());
1392
1393 return ClonedRootL;
1394}
1395
1396/// Build the cloned loops of an original loop from unswitching.
1397///
1398/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1399/// operation. We need to re-verify that there even is a loop (as the backedge
1400/// may not have been cloned), and even if there are remaining backedges the
1401/// backedge set may be different. However, we know that each child loop is
1402/// undisturbed, we only need to find where to place each child loop within
1403/// either any parent loop or within a cloned version of the original loop.
1404///
1405/// Because child loops may end up cloned outside of any cloned version of the
1406/// original loop, multiple cloned sibling loops may be created. All of them
1407/// are returned so that the newly introduced loop nest roots can be
1408/// identified.
1409static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1410 const ValueToValueMapTy &VMap, LoopInfo &LI,
1411 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1412 Loop *ClonedL = nullptr;
1413
1414 auto *OrigPH = OrigL.getLoopPreheader();
1415 auto *OrigHeader = OrigL.getHeader();
1416
1417 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1418 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1419
1420 // We need to know the loops of the cloned exit blocks to even compute the
1421 // accurate parent loop. If we only clone exits to some parent of the
1422 // original parent, we want to clone into that outer loop. We also keep track
1423 // of the loops that our cloned exit blocks participate in.
1424 Loop *ParentL = nullptr;
1425 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1427 ClonedExitsInLoops.reserve(ExitBlocks.size());
1428 for (auto *ExitBB : ExitBlocks)
1429 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1430 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1431 ExitLoopMap[ClonedExitBB] = ExitL;
1432 ClonedExitsInLoops.push_back(ClonedExitBB);
1433 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1434 ParentL = ExitL;
1435 }
1436 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1437 ParentL->contains(OrigL.getParentLoop())) &&
1438 "The computed parent loop should always contain (or be) the parent of "
1439 "the original loop.");
1440
1441 // We build the set of blocks dominated by the cloned header from the set of
1442 // cloned blocks out of the original loop. While not all of these will
1443 // necessarily be in the cloned loop, it is enough to establish that they
1444 // aren't in unreachable cycles, etc.
1445 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1446 for (auto *BB : OrigL.blocks())
1447 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1448 ClonedLoopBlocks.insert(ClonedBB);
1449
1450 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1451 // skipped cloning some region of this loop which can in turn skip some of
1452 // the backedges so we have to rebuild the blocks in the loop based on the
1453 // backedges that remain after cloning.
1455 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1456 for (auto *Pred : predecessors(ClonedHeader)) {
1457 // The only possible non-loop header predecessor is the preheader because
1458 // we know we cloned the loop in simplified form.
1459 if (Pred == ClonedPH)
1460 continue;
1461
1462 // Because the loop was in simplified form, the only non-loop predecessor
1463 // should be the preheader.
1464 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1465 "header other than the preheader "
1466 "that is not part of the loop!");
1467
1468 // Insert this block into the loop set and on the first visit (and if it
1469 // isn't the header we're currently walking) put it into the worklist to
1470 // recurse through.
1471 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1472 Worklist.push_back(Pred);
1473 }
1474
1475 // If we had any backedges then there *is* a cloned loop. Put the header into
1476 // the loop set and then walk the worklist backwards to find all the blocks
1477 // that remain within the loop after cloning.
1478 if (!BlocksInClonedLoop.empty()) {
1479 BlocksInClonedLoop.insert(ClonedHeader);
1480
1481 while (!Worklist.empty()) {
1482 BasicBlock *BB = Worklist.pop_back_val();
1483 assert(BlocksInClonedLoop.count(BB) &&
1484 "Didn't put block into the loop set!");
1485
1486 // Insert any predecessors that are in the possible set into the cloned
1487 // set, and if the insert is successful, add them to the worklist. Note
1488 // that we filter on the blocks that are definitely reachable via the
1489 // backedge to the loop header so we may prune out dead code within the
1490 // cloned loop.
1491 for (auto *Pred : predecessors(BB))
1492 if (ClonedLoopBlocks.count(Pred) &&
1493 BlocksInClonedLoop.insert(Pred).second)
1494 Worklist.push_back(Pred);
1495 }
1496
1497 ClonedL = LI.AllocateLoop();
1498 if (ParentL) {
1499 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1500 ParentL->addChildLoop(ClonedL);
1501 } else {
1502 LI.addTopLevelLoop(ClonedL);
1503 }
1504 NonChildClonedLoops.push_back(ClonedL);
1505
1506 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1507 // We don't want to just add the cloned loop blocks based on how we
1508 // discovered them. The original order of blocks was carefully built in
1509 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1510 // that logic, we just re-walk the original blocks (and those of the child
1511 // loops) and filter them as we add them into the cloned loop.
1512 for (auto *BB : OrigL.blocks()) {
1513 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1514 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1515 continue;
1516
1517 // Directly add the blocks that are only in this loop.
1518 if (LI.getLoopFor(BB) == &OrigL) {
1519 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1520 continue;
1521 }
1522
1523 // We want to manually add it to this loop and parents.
1524 // Registering it with LoopInfo will happen when we clone the top
1525 // loop for this block.
1526 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1527 PL->addBlockEntry(ClonedBB);
1528 }
1529
1530 // Now add each child loop whose header remains within the cloned loop. All
1531 // of the blocks within the loop must satisfy the same constraints as the
1532 // header so once we pass the header checks we can just clone the entire
1533 // child loop nest.
1534 for (Loop *ChildL : OrigL) {
1535 auto *ClonedChildHeader =
1536 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1537 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1538 continue;
1539
1540#ifndef NDEBUG
1541 // We should never have a cloned child loop header but fail to have
1542 // all of the blocks for that child loop.
1543 for (auto *ChildLoopBB : ChildL->blocks())
1544 assert(BlocksInClonedLoop.count(
1545 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1546 "Child cloned loop has a header within the cloned outer "
1547 "loop but not all of its blocks!");
1548#endif
1549
1550 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1551 }
1552 }
1553
1554 // Now that we've handled all the components of the original loop that were
1555 // cloned into a new loop, we still need to handle anything from the original
1556 // loop that wasn't in a cloned loop.
1557
1558 // Figure out what blocks are left to place within any loop nest containing
1559 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1560 // them.
1561 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1562 if (BlocksInClonedLoop.empty())
1563 UnloopedBlockSet.insert(ClonedPH);
1564 for (auto *ClonedBB : ClonedLoopBlocks)
1565 if (!BlocksInClonedLoop.count(ClonedBB))
1566 UnloopedBlockSet.insert(ClonedBB);
1567
1568 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1569 // backwards across these to process them inside out. The order shouldn't
1570 // matter as we're just trying to build up the map from inside-out; we use
1571 // the map in a more stably ordered way below.
1572 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1573 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1574 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1575 ExitLoopMap.lookup(RHS)->getLoopDepth();
1576 });
1577
1578 // Populate the existing ExitLoopMap with everything reachable from each
1579 // exit, starting from the inner most exit.
1580 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1581 assert(Worklist.empty() && "Didn't clear worklist!");
1582
1583 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1584 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1585
1586 // Walk the CFG back until we hit the cloned PH adding everything reachable
1587 // and in the unlooped set to this exit block's loop.
1588 Worklist.push_back(ExitBB);
1589 do {
1590 BasicBlock *BB = Worklist.pop_back_val();
1591 // We can stop recursing at the cloned preheader (if we get there).
1592 if (BB == ClonedPH)
1593 continue;
1594
1595 for (BasicBlock *PredBB : predecessors(BB)) {
1596 // If this pred has already been moved to our set or is part of some
1597 // (inner) loop, no update needed.
1598 if (!UnloopedBlockSet.erase(PredBB)) {
1599 assert(
1600 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1601 "Predecessor not mapped to a loop!");
1602 continue;
1603 }
1604
1605 // We just insert into the loop set here. We'll add these blocks to the
1606 // exit loop after we build up the set in an order that doesn't rely on
1607 // predecessor order (which in turn relies on use list order).
1608 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1609 (void)Inserted;
1610 assert(Inserted && "Should only visit an unlooped block once!");
1611
1612 // And recurse through to its predecessors.
1613 Worklist.push_back(PredBB);
1614 }
1615 } while (!Worklist.empty());
1616 }
1617
1618 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1619 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1620 // in their original order adding them to the correct loop.
1621
1622 // We need a stable insertion order. We use the order of the original loop
1623 // order and map into the correct parent loop.
1624 for (auto *BB : llvm::concat<BasicBlock *const>(
1625 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1626 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1627 OuterL->addBasicBlockToLoop(BB, LI);
1628
1629#ifndef NDEBUG
1630 for (auto &BBAndL : ExitLoopMap) {
1631 auto *BB = BBAndL.first;
1632 auto *OuterL = BBAndL.second;
1633 assert(LI.getLoopFor(BB) == OuterL &&
1634 "Failed to put all blocks into outer loops!");
1635 }
1636#endif
1637
1638 // Now that all the blocks are placed into the correct containing loop in the
1639 // absence of child loops, find all the potentially cloned child loops and
1640 // clone them into whatever outer loop we placed their header into.
1641 for (Loop *ChildL : OrigL) {
1642 auto *ClonedChildHeader =
1643 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1644 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1645 continue;
1646
1647#ifndef NDEBUG
1648 for (auto *ChildLoopBB : ChildL->blocks())
1649 assert(VMap.count(ChildLoopBB) &&
1650 "Cloned a child loop header but not all of that loops blocks!");
1651#endif
1652
1653 NonChildClonedLoops.push_back(cloneLoopNest(
1654 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1655 }
1656}
1657
1658static void
1660 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1661 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1662 // Find all the dead clones, and remove them from their successors.
1664 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1665 for (const auto &VMap : VMaps)
1666 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1667 if (!DT.isReachableFromEntry(ClonedBB)) {
1668 for (BasicBlock *SuccBB : successors(ClonedBB))
1669 SuccBB->removePredecessor(ClonedBB);
1670 DeadBlocks.push_back(ClonedBB);
1671 }
1672
1673 // Remove all MemorySSA in the dead blocks
1674 if (MSSAU) {
1675 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1676 DeadBlocks.end());
1677 MSSAU->removeBlocks(DeadBlockSet);
1678 }
1679
1680 // Drop any remaining references to break cycles.
1681 for (BasicBlock *BB : DeadBlocks)
1682 BB->dropAllReferences();
1683 // Erase them from the IR.
1684 for (BasicBlock *BB : DeadBlocks)
1685 BB->eraseFromParent();
1686}
1687
1690 DominatorTree &DT, LoopInfo &LI,
1691 MemorySSAUpdater *MSSAU,
1692 ScalarEvolution *SE,
1693 LPMUpdater &LoopUpdater) {
1694 // Find all the dead blocks tied to this loop, and remove them from their
1695 // successors.
1697
1698 // Start with loop/exit blocks and get a transitive closure of reachable dead
1699 // blocks.
1700 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1701 ExitBlocks.end());
1702 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1703 while (!DeathCandidates.empty()) {
1704 auto *BB = DeathCandidates.pop_back_val();
1705 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1706 for (BasicBlock *SuccBB : successors(BB)) {
1707 SuccBB->removePredecessor(BB);
1708 DeathCandidates.push_back(SuccBB);
1709 }
1710 DeadBlockSet.insert(BB);
1711 }
1712 }
1713
1714 // Remove all MemorySSA in the dead blocks
1715 if (MSSAU)
1716 MSSAU->removeBlocks(DeadBlockSet);
1717
1718 // Filter out the dead blocks from the exit blocks list so that it can be
1719 // used in the caller.
1720 llvm::erase_if(ExitBlocks,
1721 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1722
1723 // Walk from this loop up through its parents removing all of the dead blocks.
1724 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1725 for (auto *BB : DeadBlockSet)
1726 ParentL->getBlocksSet().erase(BB);
1727 llvm::erase_if(ParentL->getBlocksVector(),
1728 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1729 }
1730
1731 // Now delete the dead child loops. This raw delete will clear them
1732 // recursively.
1733 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1734 if (!DeadBlockSet.count(ChildL->getHeader()))
1735 return false;
1736
1737 assert(llvm::all_of(ChildL->blocks(),
1738 [&](BasicBlock *ChildBB) {
1739 return DeadBlockSet.count(ChildBB);
1740 }) &&
1741 "If the child loop header is dead all blocks in the child loop must "
1742 "be dead as well!");
1743 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1744 if (SE)
1746 LI.destroy(ChildL);
1747 return true;
1748 });
1749
1750 // Remove the loop mappings for the dead blocks and drop all the references
1751 // from these blocks to others to handle cyclic references as we start
1752 // deleting the blocks themselves.
1753 for (auto *BB : DeadBlockSet) {
1754 // Check that the dominator tree has already been updated.
1755 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1756 LI.changeLoopFor(BB, nullptr);
1757 // Drop all uses of the instructions to make sure we won't have dangling
1758 // uses in other blocks.
1759 for (auto &I : *BB)
1760 if (!I.use_empty())
1761 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1762 BB->dropAllReferences();
1763 }
1764
1765 // Actually delete the blocks now that they've been fully unhooked from the
1766 // IR.
1767 for (auto *BB : DeadBlockSet)
1768 BB->eraseFromParent();
1769}
1770
1771/// Recompute the set of blocks in a loop after unswitching.
1772///
1773/// This walks from the original headers predecessors to rebuild the loop. We
1774/// take advantage of the fact that new blocks can't have been added, and so we
1775/// filter by the original loop's blocks. This also handles potentially
1776/// unreachable code that we don't want to explore but might be found examining
1777/// the predecessors of the header.
1778///
1779/// If the original loop is no longer a loop, this will return an empty set. If
1780/// it remains a loop, all the blocks within it will be added to the set
1781/// (including those blocks in inner loops).
1783 LoopInfo &LI) {
1785
1786 auto *PH = L.getLoopPreheader();
1787 auto *Header = L.getHeader();
1788
1789 // A worklist to use while walking backwards from the header.
1791
1792 // First walk the predecessors of the header to find the backedges. This will
1793 // form the basis of our walk.
1794 for (auto *Pred : predecessors(Header)) {
1795 // Skip the preheader.
1796 if (Pred == PH)
1797 continue;
1798
1799 // Because the loop was in simplified form, the only non-loop predecessor
1800 // is the preheader.
1801 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1802 "than the preheader that is not part of the "
1803 "loop!");
1804
1805 // Insert this block into the loop set and on the first visit and, if it
1806 // isn't the header we're currently walking, put it into the worklist to
1807 // recurse through.
1808 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1809 Worklist.push_back(Pred);
1810 }
1811
1812 // If no backedges were found, we're done.
1813 if (LoopBlockSet.empty())
1814 return LoopBlockSet;
1815
1816 // We found backedges, recurse through them to identify the loop blocks.
1817 while (!Worklist.empty()) {
1818 BasicBlock *BB = Worklist.pop_back_val();
1819 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1820
1821 // No need to walk past the header.
1822 if (BB == Header)
1823 continue;
1824
1825 // Because we know the inner loop structure remains valid we can use the
1826 // loop structure to jump immediately across the entire nested loop.
1827 // Further, because it is in loop simplified form, we can directly jump
1828 // to its preheader afterward.
1829 if (Loop *InnerL = LI.getLoopFor(BB))
1830 if (InnerL != &L) {
1831 assert(L.contains(InnerL) &&
1832 "Should not reach a loop *outside* this loop!");
1833 // The preheader is the only possible predecessor of the loop so
1834 // insert it into the set and check whether it was already handled.
1835 auto *InnerPH = InnerL->getLoopPreheader();
1836 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1837 "but not contain the inner loop "
1838 "preheader!");
1839 if (!LoopBlockSet.insert(InnerPH).second)
1840 // The only way to reach the preheader is through the loop body
1841 // itself so if it has been visited the loop is already handled.
1842 continue;
1843
1844 // Insert all of the blocks (other than those already present) into
1845 // the loop set. We expect at least the block that led us to find the
1846 // inner loop to be in the block set, but we may also have other loop
1847 // blocks if they were already enqueued as predecessors of some other
1848 // outer loop block.
1849 for (auto *InnerBB : InnerL->blocks()) {
1850 if (InnerBB == BB) {
1851 assert(LoopBlockSet.count(InnerBB) &&
1852 "Block should already be in the set!");
1853 continue;
1854 }
1855
1856 LoopBlockSet.insert(InnerBB);
1857 }
1858
1859 // Add the preheader to the worklist so we will continue past the
1860 // loop body.
1861 Worklist.push_back(InnerPH);
1862 continue;
1863 }
1864
1865 // Insert any predecessors that were in the original loop into the new
1866 // set, and if the insert is successful, add them to the worklist.
1867 for (auto *Pred : predecessors(BB))
1868 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1869 Worklist.push_back(Pred);
1870 }
1871
1872 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1873
1874 // We've found all the blocks participating in the loop, return our completed
1875 // set.
1876 return LoopBlockSet;
1877}
1878
1879/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1880///
1881/// The removal may have removed some child loops entirely but cannot have
1882/// disturbed any remaining child loops. However, they may need to be hoisted
1883/// to the parent loop (or to be top-level loops). The original loop may be
1884/// completely removed.
1885///
1886/// The sibling loops resulting from this update are returned. If the original
1887/// loop remains a valid loop, it will be the first entry in this list with all
1888/// of the newly sibling loops following it.
1889///
1890/// Returns true if the loop remains a loop after unswitching, and false if it
1891/// is no longer a loop after unswitching (and should not continue to be
1892/// referenced).
1894 LoopInfo &LI,
1895 SmallVectorImpl<Loop *> &HoistedLoops,
1896 ScalarEvolution *SE) {
1897 auto *PH = L.getLoopPreheader();
1898
1899 // Compute the actual parent loop from the exit blocks. Because we may have
1900 // pruned some exits the loop may be different from the original parent.
1901 Loop *ParentL = nullptr;
1902 SmallVector<Loop *, 4> ExitLoops;
1903 SmallVector<BasicBlock *, 4> ExitsInLoops;
1904 ExitsInLoops.reserve(ExitBlocks.size());
1905 for (auto *ExitBB : ExitBlocks)
1906 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1907 ExitLoops.push_back(ExitL);
1908 ExitsInLoops.push_back(ExitBB);
1909 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1910 ParentL = ExitL;
1911 }
1912
1913 // Recompute the blocks participating in this loop. This may be empty if it
1914 // is no longer a loop.
1915 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1916
1917 // If we still have a loop, we need to re-set the loop's parent as the exit
1918 // block set changing may have moved it within the loop nest. Note that this
1919 // can only happen when this loop has a parent as it can only hoist the loop
1920 // *up* the nest.
1921 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1922 // Remove this loop's (original) blocks from all of the intervening loops.
1923 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1924 IL = IL->getParentLoop()) {
1925 IL->getBlocksSet().erase(PH);
1926 for (auto *BB : L.blocks())
1927 IL->getBlocksSet().erase(BB);
1928 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1929 return BB == PH || L.contains(BB);
1930 });
1931 }
1932
1933 LI.changeLoopFor(PH, ParentL);
1934 L.getParentLoop()->removeChildLoop(&L);
1935 if (ParentL)
1936 ParentL->addChildLoop(&L);
1937 else
1938 LI.addTopLevelLoop(&L);
1939 }
1940
1941 // Now we update all the blocks which are no longer within the loop.
1942 auto &Blocks = L.getBlocksVector();
1943 auto BlocksSplitI =
1944 LoopBlockSet.empty()
1945 ? Blocks.begin()
1946 : std::stable_partition(
1947 Blocks.begin(), Blocks.end(),
1948 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1949
1950 // Before we erase the list of unlooped blocks, build a set of them.
1951 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1952 if (LoopBlockSet.empty())
1953 UnloopedBlocks.insert(PH);
1954
1955 // Now erase these blocks from the loop.
1956 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1957 L.getBlocksSet().erase(BB);
1958 Blocks.erase(BlocksSplitI, Blocks.end());
1959
1960 // Sort the exits in ascending loop depth, we'll work backwards across these
1961 // to process them inside out.
1962 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1963 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1964 });
1965
1966 // We'll build up a set for each exit loop.
1967 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1968 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1969
1970 auto RemoveUnloopedBlocksFromLoop =
1971 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1972 for (auto *BB : UnloopedBlocks)
1973 L.getBlocksSet().erase(BB);
1974 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1975 return UnloopedBlocks.count(BB);
1976 });
1977 };
1978
1980 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1981 assert(Worklist.empty() && "Didn't clear worklist!");
1982 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1983
1984 // Grab the next exit block, in decreasing loop depth order.
1985 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1986 Loop &ExitL = *LI.getLoopFor(ExitBB);
1987 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1988
1989 // Erase all of the unlooped blocks from the loops between the previous
1990 // exit loop and this exit loop. This works because the ExitInLoops list is
1991 // sorted in increasing order of loop depth and thus we visit loops in
1992 // decreasing order of loop depth.
1993 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1994 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1995
1996 // Walk the CFG back until we hit the cloned PH adding everything reachable
1997 // and in the unlooped set to this exit block's loop.
1998 Worklist.push_back(ExitBB);
1999 do {
2000 BasicBlock *BB = Worklist.pop_back_val();
2001 // We can stop recursing at the cloned preheader (if we get there).
2002 if (BB == PH)
2003 continue;
2004
2005 for (BasicBlock *PredBB : predecessors(BB)) {
2006 // If this pred has already been moved to our set or is part of some
2007 // (inner) loop, no update needed.
2008 if (!UnloopedBlocks.erase(PredBB)) {
2009 assert((NewExitLoopBlocks.count(PredBB) ||
2010 ExitL.contains(LI.getLoopFor(PredBB))) &&
2011 "Predecessor not in a nested loop (or already visited)!");
2012 continue;
2013 }
2014
2015 // We just insert into the loop set here. We'll add these blocks to the
2016 // exit loop after we build up the set in a deterministic order rather
2017 // than the predecessor-influenced visit order.
2018 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2019 (void)Inserted;
2020 assert(Inserted && "Should only visit an unlooped block once!");
2021
2022 // And recurse through to its predecessors.
2023 Worklist.push_back(PredBB);
2024 }
2025 } while (!Worklist.empty());
2026
2027 // If blocks in this exit loop were directly part of the original loop (as
2028 // opposed to a child loop) update the map to point to this exit loop. This
2029 // just updates a map and so the fact that the order is unstable is fine.
2030 for (auto *BB : NewExitLoopBlocks)
2031 if (Loop *BBL = LI.getLoopFor(BB))
2032 if (BBL == &L || !L.contains(BBL))
2033 LI.changeLoopFor(BB, &ExitL);
2034
2035 // We will remove the remaining unlooped blocks from this loop in the next
2036 // iteration or below.
2037 NewExitLoopBlocks.clear();
2038 }
2039
2040 // Any remaining unlooped blocks are no longer part of any loop unless they
2041 // are part of some child loop.
2042 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2043 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2044 for (auto *BB : UnloopedBlocks)
2045 if (Loop *BBL = LI.getLoopFor(BB))
2046 if (BBL == &L || !L.contains(BBL))
2047 LI.changeLoopFor(BB, nullptr);
2048
2049 // Sink all the child loops whose headers are no longer in the loop set to
2050 // the parent (or to be top level loops). We reach into the loop and directly
2051 // update its subloop vector to make this batch update efficient.
2052 auto &SubLoops = L.getSubLoopsVector();
2053 auto SubLoopsSplitI =
2054 LoopBlockSet.empty()
2055 ? SubLoops.begin()
2056 : std::stable_partition(
2057 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2058 return LoopBlockSet.count(SubL->getHeader());
2059 });
2060 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2061 HoistedLoops.push_back(HoistedL);
2062 HoistedL->setParentLoop(nullptr);
2063
2064 // To compute the new parent of this hoisted loop we look at where we
2065 // placed the preheader above. We can't lookup the header itself because we
2066 // retained the mapping from the header to the hoisted loop. But the
2067 // preheader and header should have the exact same new parent computed
2068 // based on the set of exit blocks from the original loop as the preheader
2069 // is a predecessor of the header and so reached in the reverse walk. And
2070 // because the loops were all in simplified form the preheader of the
2071 // hoisted loop can't be part of some *other* loop.
2072 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2073 NewParentL->addChildLoop(HoistedL);
2074 else
2075 LI.addTopLevelLoop(HoistedL);
2076 }
2077 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2078
2079 // Actually delete the loop if nothing remained within it.
2080 if (Blocks.empty()) {
2081 assert(SubLoops.empty() &&
2082 "Failed to remove all subloops from the original loop!");
2083 if (Loop *ParentL = L.getParentLoop())
2084 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2085 else
2086 LI.removeLoop(llvm::find(LI, &L));
2087 // markLoopAsDeleted for L should be triggered by the caller (it is
2088 // typically done within postUnswitch).
2089 if (SE)
2091 LI.destroy(&L);
2092 return false;
2093 }
2094
2095 return true;
2096}
2097
2098/// Helper to visit a dominator subtree, invoking a callable on each node.
2099///
2100/// Returning false at any point will stop walking past that node of the tree.
2101template <typename CallableT>
2102void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2104 DomWorklist.push_back(DT[BB]);
2105#ifndef NDEBUG
2107 Visited.insert(DT[BB]);
2108#endif
2109 do {
2110 DomTreeNode *N = DomWorklist.pop_back_val();
2111
2112 // Visit this node.
2113 if (!Callable(N->getBlock()))
2114 continue;
2115
2116 // Accumulate the child nodes.
2117 for (DomTreeNode *ChildN : *N) {
2118 assert(Visited.insert(ChildN).second &&
2119 "Cannot visit a node twice when walking a tree!");
2120 DomWorklist.push_back(ChildN);
2121 }
2122 } while (!DomWorklist.empty());
2123}
2124
2126 bool CurrentLoopValid, bool PartiallyInvariant,
2127 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2128 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2129 if (!NewLoops.empty())
2130 U.addSiblingLoops(NewLoops);
2131
2132 // If the current loop remains valid, we should revisit it to catch any
2133 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2134 if (CurrentLoopValid) {
2135 if (PartiallyInvariant) {
2136 // Mark the new loop as partially unswitched, to avoid unswitching on
2137 // the same condition again.
2138 auto &Context = L.getHeader()->getContext();
2139 MDNode *DisableUnswitchMD = MDNode::get(
2140 Context,
2141 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2143 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2144 {DisableUnswitchMD});
2145 L.setLoopID(NewLoopID);
2146 } else if (InjectedCondition) {
2147 // Do the same for injection of invariant conditions.
2148 auto &Context = L.getHeader()->getContext();
2149 MDNode *DisableUnswitchMD = MDNode::get(
2150 Context,
2151 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2153 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2154 {DisableUnswitchMD});
2155 L.setLoopID(NewLoopID);
2156 } else
2157 U.revisitCurrentLoop();
2158 } else
2159 U.markLoopAsDeleted(L, LoopName);
2160}
2161
2163 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2164 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2166 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2167 auto *ParentBB = TI.getParent();
2168 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2169 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2170
2171 // Save the current loop name in a variable so that we can report it even
2172 // after it has been deleted.
2173 std::string LoopName(L.getName());
2174
2175 // We can only unswitch switches, conditional branches with an invariant
2176 // condition, or combining invariant conditions with an instruction or
2177 // partially invariant instructions.
2178 assert((SI || (BI && BI->isConditional())) &&
2179 "Can only unswitch switches and conditional branch!");
2180 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2181 bool FullUnswitch =
2182 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2183 !PartiallyInvariant);
2184 if (FullUnswitch)
2185 assert(Invariants.size() == 1 &&
2186 "Cannot have other invariants with full unswitching!");
2187 else
2188 assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
2189 "Partial unswitching requires an instruction as the condition!");
2190
2191 if (MSSAU && VerifyMemorySSA)
2192 MSSAU->getMemorySSA()->verifyMemorySSA();
2193
2194 // Constant and BBs tracking the cloned and continuing successor. When we are
2195 // unswitching the entire condition, this can just be trivially chosen to
2196 // unswitch towards `true`. However, when we are unswitching a set of
2197 // invariants combined with `and` or `or` or partially invariant instructions,
2198 // the combining operation determines the best direction to unswitch: we want
2199 // to unswitch the direction that will collapse the branch.
2200 bool Direction = true;
2201 int ClonedSucc = 0;
2202 if (!FullUnswitch) {
2204 (void)Cond;
2206 PartiallyInvariant) &&
2207 "Only `or`, `and`, an `select`, partially invariant instructions "
2208 "can combine invariants being unswitched.");
2209 if (!match(Cond, m_LogicalOr())) {
2210 if (match(Cond, m_LogicalAnd()) ||
2211 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2212 Direction = false;
2213 ClonedSucc = 1;
2214 }
2215 }
2216 }
2217
2218 BasicBlock *RetainedSuccBB =
2219 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2220 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2221 if (BI)
2222 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2223 else
2224 for (auto Case : SI->cases())
2225 if (Case.getCaseSuccessor() != RetainedSuccBB)
2226 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2227
2228 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2229 "Should not unswitch the same successor we are retaining!");
2230
2231 // The branch should be in this exact loop. Any inner loop's invariant branch
2232 // should be handled by unswitching that inner loop. The caller of this
2233 // routine should filter out any candidates that remain (but were skipped for
2234 // whatever reason).
2235 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2236
2237 // Compute the parent loop now before we start hacking on things.
2238 Loop *ParentL = L.getParentLoop();
2239 // Get blocks in RPO order for MSSA update, before changing the CFG.
2240 LoopBlocksRPO LBRPO(&L);
2241 if (MSSAU)
2242 LBRPO.perform(&LI);
2243
2244 // Compute the outer-most loop containing one of our exit blocks. This is the
2245 // furthest up our loopnest which can be mutated, which we will use below to
2246 // update things.
2247 Loop *OuterExitL = &L;
2249 L.getUniqueExitBlocks(ExitBlocks);
2250 for (auto *ExitBB : ExitBlocks) {
2251 // ExitBB can be an exit block for several levels in the loop nest. Make
2252 // sure we find the top most.
2253 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2254 if (!NewOuterExitL) {
2255 // We exited the entire nest with this block, so we're done.
2256 OuterExitL = nullptr;
2257 break;
2258 }
2259 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2260 OuterExitL = NewOuterExitL;
2261 }
2262
2263 // At this point, we're definitely going to unswitch something so invalidate
2264 // any cached information in ScalarEvolution for the outer most loop
2265 // containing an exit block and all nested loops.
2266 if (SE) {
2267 if (OuterExitL)
2268 SE->forgetLoop(OuterExitL);
2269 else
2270 SE->forgetTopmostLoop(&L);
2272 }
2273
2274 // If the edge from this terminator to a successor dominates that successor,
2275 // store a map from each block in its dominator subtree to it. This lets us
2276 // tell when cloning for a particular successor if a block is dominated by
2277 // some *other* successor with a single data structure. We use this to
2278 // significantly reduce cloning.
2280 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2281 UnswitchedSuccBBs))
2282 if (SuccBB->getUniquePredecessor() ||
2283 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2284 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2285 }))
2286 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2287 DominatingSucc[BB] = SuccBB;
2288 return true;
2289 });
2290
2291 // Split the preheader, so that we know that there is a safe place to insert
2292 // the conditional branch. We will change the preheader to have a conditional
2293 // branch on LoopCond. The original preheader will become the split point
2294 // between the unswitched versions, and we will have a new preheader for the
2295 // original loop.
2296 BasicBlock *SplitBB = L.getLoopPreheader();
2297 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2298
2299 // Keep track of the dominator tree updates needed.
2301
2302 // Clone the loop for each unswitched successor.
2304 VMaps.reserve(UnswitchedSuccBBs.size());
2306 for (auto *SuccBB : UnswitchedSuccBBs) {
2307 VMaps.emplace_back(new ValueToValueMapTy());
2308 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2309 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2310 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2311 }
2312
2313 // Drop metadata if we may break its semantics by moving this instr into the
2314 // split block.
2315 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2317 // Do not spend time trying to understand if we can keep it, just drop it
2318 // to save compile time.
2319 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2320 else {
2321 // It is only legal to preserve make.implicit metadata if we are
2322 // guaranteed no reach implicit null check after following this branch.
2323 ICFLoopSafetyInfo SafetyInfo;
2324 SafetyInfo.computeLoopSafetyInfo(&L);
2325 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2326 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2327 }
2328 }
2329
2330 // The stitching of the branched code back together depends on whether we're
2331 // doing full unswitching or not with the exception that we always want to
2332 // nuke the initial terminator placed in the split block.
2333 SplitBB->getTerminator()->eraseFromParent();
2334 if (FullUnswitch) {
2335 // Splice the terminator from the original loop and rewrite its
2336 // successors.
2337 TI.moveBefore(*SplitBB, SplitBB->end());
2338
2339 // Keep a clone of the terminator for MSSA updates.
2340 Instruction *NewTI = TI.clone();
2341 NewTI->insertInto(ParentBB, ParentBB->end());
2342
2343 // First wire up the moved terminator to the preheaders.
2344 if (BI) {
2345 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2346 BI->setSuccessor(ClonedSucc, ClonedPH);
2347 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2349 if (InsertFreeze)
2350 Cond = new FreezeInst(
2351 Cond, Cond->getName() + ".fr", BI);
2352 BI->setCondition(Cond);
2353 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2354 } else {
2355 assert(SI && "Must either be a branch or switch!");
2356
2357 // Walk the cases and directly update their successors.
2358 assert(SI->getDefaultDest() == RetainedSuccBB &&
2359 "Not retaining default successor!");
2360 SI->setDefaultDest(LoopPH);
2361 for (const auto &Case : SI->cases())
2362 if (Case.getCaseSuccessor() == RetainedSuccBB)
2363 Case.setSuccessor(LoopPH);
2364 else
2365 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2366
2367 if (InsertFreeze)
2368 SI->setCondition(new FreezeInst(
2369 SI->getCondition(), SI->getCondition()->getName() + ".fr", SI));
2370
2371 // We need to use the set to populate domtree updates as even when there
2372 // are multiple cases pointing at the same successor we only want to
2373 // remove and insert one edge in the domtree.
2374 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2375 DTUpdates.push_back(
2376 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2377 }
2378
2379 if (MSSAU) {
2380 DT.applyUpdates(DTUpdates);
2381 DTUpdates.clear();
2382
2383 // Remove all but one edge to the retained block and all unswitched
2384 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2385 // when we know we only keep a single edge for each case.
2386 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2387 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2388 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2389
2390 for (auto &VMap : VMaps)
2391 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2392 /*IgnoreIncomingWithNoClones=*/true);
2393 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2394
2395 // Remove all edges to unswitched blocks.
2396 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2397 MSSAU->removeEdge(ParentBB, SuccBB);
2398 }
2399
2400 // Now unhook the successor relationship as we'll be replacing
2401 // the terminator with a direct branch. This is much simpler for branches
2402 // than switches so we handle those first.
2403 if (BI) {
2404 // Remove the parent as a predecessor of the unswitched successor.
2405 assert(UnswitchedSuccBBs.size() == 1 &&
2406 "Only one possible unswitched block for a branch!");
2407 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2408 UnswitchedSuccBB->removePredecessor(ParentBB,
2409 /*KeepOneInputPHIs*/ true);
2410 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2411 } else {
2412 // Note that we actually want to remove the parent block as a predecessor
2413 // of *every* case successor. The case successor is either unswitched,
2414 // completely eliminating an edge from the parent to that successor, or it
2415 // is a duplicate edge to the retained successor as the retained successor
2416 // is always the default successor and as we'll replace this with a direct
2417 // branch we no longer need the duplicate entries in the PHI nodes.
2418 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2419 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2420 "Not retaining default successor!");
2421 for (const auto &Case : NewSI->cases())
2422 Case.getCaseSuccessor()->removePredecessor(
2423 ParentBB,
2424 /*KeepOneInputPHIs*/ true);
2425
2426 // We need to use the set to populate domtree updates as even when there
2427 // are multiple cases pointing at the same successor we only want to
2428 // remove and insert one edge in the domtree.
2429 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2430 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2431 }
2432
2433 // After MSSAU update, remove the cloned terminator instruction NewTI.
2434 ParentBB->getTerminator()->eraseFromParent();
2435
2436 // Create a new unconditional branch to the continuing block (as opposed to
2437 // the one cloned).
2438 BranchInst::Create(RetainedSuccBB, ParentBB);
2439 } else {
2440 assert(BI && "Only branches have partial unswitching.");
2441 assert(UnswitchedSuccBBs.size() == 1 &&
2442 "Only one possible unswitched block for a branch!");
2443 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2444 // When doing a partial unswitch, we have to do a bit more work to build up
2445 // the branch in the split block.
2446 if (PartiallyInvariant)
2448 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2449 else {
2451 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2452 FreezeLoopUnswitchCond, BI, &AC, DT);
2453 }
2454 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2455
2456 if (MSSAU) {
2457 DT.applyUpdates(DTUpdates);
2458 DTUpdates.clear();
2459
2460 // Perform MSSA cloning updates.
2461 for (auto &VMap : VMaps)
2462 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2463 /*IgnoreIncomingWithNoClones=*/true);
2464 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2465 }
2466 }
2467
2468 // Apply the updates accumulated above to get an up-to-date dominator tree.
2469 DT.applyUpdates(DTUpdates);
2470
2471 // Now that we have an accurate dominator tree, first delete the dead cloned
2472 // blocks so that we can accurately build any cloned loops. It is important to
2473 // not delete the blocks from the original loop yet because we still want to
2474 // reference the original loop to understand the cloned loop's structure.
2475 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2476
2477 // Build the cloned loop structure itself. This may be substantially
2478 // different from the original structure due to the simplified CFG. This also
2479 // handles inserting all the cloned blocks into the correct loops.
2480 SmallVector<Loop *, 4> NonChildClonedLoops;
2481 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2482 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2483
2484 // Now that our cloned loops have been built, we can update the original loop.
2485 // First we delete the dead blocks from it and then we rebuild the loop
2486 // structure taking these deletions into account.
2487 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2488
2489 if (MSSAU && VerifyMemorySSA)
2490 MSSAU->getMemorySSA()->verifyMemorySSA();
2491
2492 SmallVector<Loop *, 4> HoistedLoops;
2493 bool IsStillLoop =
2494 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2495
2496 if (MSSAU && VerifyMemorySSA)
2497 MSSAU->getMemorySSA()->verifyMemorySSA();
2498
2499 // This transformation has a high risk of corrupting the dominator tree, and
2500 // the below steps to rebuild loop structures will result in hard to debug
2501 // errors in that case so verify that the dominator tree is sane first.
2502 // FIXME: Remove this when the bugs stop showing up and rely on existing
2503 // verification steps.
2504 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2505
2506 if (BI && !PartiallyInvariant) {
2507 // If we unswitched a branch which collapses the condition to a known
2508 // constant we want to replace all the uses of the invariants within both
2509 // the original and cloned blocks. We do this here so that we can use the
2510 // now updated dominator tree to identify which side the users are on.
2511 assert(UnswitchedSuccBBs.size() == 1 &&
2512 "Only one possible unswitched block for a branch!");
2513 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2514
2515 // When considering multiple partially-unswitched invariants
2516 // we cant just go replace them with constants in both branches.
2517 //
2518 // For 'AND' we infer that true branch ("continue") means true
2519 // for each invariant operand.
2520 // For 'OR' we can infer that false branch ("continue") means false
2521 // for each invariant operand.
2522 // So it happens that for multiple-partial case we dont replace
2523 // in the unswitched branch.
2524 bool ReplaceUnswitched =
2525 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2526
2527 ConstantInt *UnswitchedReplacement =
2530 ConstantInt *ContinueReplacement =
2533 for (Value *Invariant : Invariants) {
2534 assert(!isa<Constant>(Invariant) &&
2535 "Should not be replacing constant values!");
2536 // Use make_early_inc_range here as set invalidates the iterator.
2537 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2538 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2539 if (!UserI)
2540 continue;
2541
2542 // Replace it with the 'continue' side if in the main loop body, and the
2543 // unswitched if in the cloned blocks.
2544 if (DT.dominates(LoopPH, UserI->getParent()))
2545 U.set(ContinueReplacement);
2546 else if (ReplaceUnswitched &&
2547 DT.dominates(ClonedPH, UserI->getParent()))
2548 U.set(UnswitchedReplacement);
2549 }
2550 }
2551 }
2552
2553 // We can change which blocks are exit blocks of all the cloned sibling
2554 // loops, the current loop, and any parent loops which shared exit blocks
2555 // with the current loop. As a consequence, we need to re-form LCSSA for
2556 // them. But we shouldn't need to re-form LCSSA for any child loops.
2557 // FIXME: This could be made more efficient by tracking which exit blocks are
2558 // new, and focusing on them, but that isn't likely to be necessary.
2559 //
2560 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2561 // loop nest and update every loop that could have had its exits changed. We
2562 // also need to cover any intervening loops. We add all of these loops to
2563 // a list and sort them by loop depth to achieve this without updating
2564 // unnecessary loops.
2565 auto UpdateLoop = [&](Loop &UpdateL) {
2566#ifndef NDEBUG
2567 UpdateL.verifyLoop();
2568 for (Loop *ChildL : UpdateL) {
2569 ChildL->verifyLoop();
2570 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2571 "Perturbed a child loop's LCSSA form!");
2572 }
2573#endif
2574 // First build LCSSA for this loop so that we can preserve it when
2575 // forming dedicated exits. We don't want to perturb some other loop's
2576 // LCSSA while doing that CFG edit.
2577 formLCSSA(UpdateL, DT, &LI, SE);
2578
2579 // For loops reached by this loop's original exit blocks we may
2580 // introduced new, non-dedicated exits. At least try to re-form dedicated
2581 // exits for these loops. This may fail if they couldn't have dedicated
2582 // exits to start with.
2583 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2584 };
2585
2586 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2587 // and we can do it in any order as they don't nest relative to each other.
2588 //
2589 // Also check if any of the loops we have updated have become top-level loops
2590 // as that will necessitate widening the outer loop scope.
2591 for (Loop *UpdatedL :
2592 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2593 UpdateLoop(*UpdatedL);
2594 if (UpdatedL->isOutermost())
2595 OuterExitL = nullptr;
2596 }
2597 if (IsStillLoop) {
2598 UpdateLoop(L);
2599 if (L.isOutermost())
2600 OuterExitL = nullptr;
2601 }
2602
2603 // If the original loop had exit blocks, walk up through the outer most loop
2604 // of those exit blocks to update LCSSA and form updated dedicated exits.
2605 if (OuterExitL != &L)
2606 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2607 OuterL = OuterL->getParentLoop())
2608 UpdateLoop(*OuterL);
2609
2610#ifndef NDEBUG
2611 // Verify the entire loop structure to catch any incorrect updates before we
2612 // progress in the pass pipeline.
2613 LI.verify(DT);
2614#endif
2615
2616 // Now that we've unswitched something, make callbacks to report the changes.
2617 // For that we need to merge together the updated loops and the cloned loops
2618 // and check whether the original loop survived.
2619 SmallVector<Loop *, 4> SibLoops;
2620 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2621 if (UpdatedL->getParentLoop() == ParentL)
2622 SibLoops.push_back(UpdatedL);
2623 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2624 InjectedCondition, SibLoops);
2625
2626 if (MSSAU && VerifyMemorySSA)
2627 MSSAU->getMemorySSA()->verifyMemorySSA();
2628
2629 if (BI)
2630 ++NumBranches;
2631 else
2632 ++NumSwitches;
2633}
2634
2635/// Recursively compute the cost of a dominator subtree based on the per-block
2636/// cost map provided.
2637///
2638/// The recursive computation is memozied into the provided DT-indexed cost map
2639/// to allow querying it for most nodes in the domtree without it becoming
2640/// quadratic.
2642 DomTreeNode &N,
2645 // Don't accumulate cost (or recurse through) blocks not in our block cost
2646 // map and thus not part of the duplication cost being considered.
2647 auto BBCostIt = BBCostMap.find(N.getBlock());
2648 if (BBCostIt == BBCostMap.end())
2649 return 0;
2650
2651 // Lookup this node to see if we already computed its cost.
2652 auto DTCostIt = DTCostMap.find(&N);
2653 if (DTCostIt != DTCostMap.end())
2654 return DTCostIt->second;
2655
2656 // If not, we have to compute it. We can't use insert above and update
2657 // because computing the cost may insert more things into the map.
2658 InstructionCost Cost = std::accumulate(
2659 N.begin(), N.end(), BBCostIt->second,
2660 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2661 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2662 });
2663 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2664 (void)Inserted;
2665 assert(Inserted && "Should not insert a node while visiting children!");
2666 return Cost;
2667}
2668
2669/// Turns a select instruction into implicit control flow branch,
2670/// making the following replacement:
2671///
2672/// head:
2673/// --code before select--
2674/// select %cond, %trueval, %falseval
2675/// --code after select--
2676///
2677/// into
2678///
2679/// head:
2680/// --code before select--
2681/// br i1 %cond, label %then, label %tail
2682///
2683/// then:
2684/// br %tail
2685///
2686/// tail:
2687/// phi [ %trueval, %then ], [ %falseval, %head]
2688/// unreachable
2689///
2690/// It also makes all relevant DT and LI updates, so that all structures are in
2691/// valid state after this transform.
2693 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2694 AssumptionCache *AC) {
2695 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2696 BasicBlock *HeadBB = SI->getParent();
2697
2698 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2699 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2700 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2701 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2702 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2703 *TailBB = CondBr->getSuccessor(1);
2704 if (MSSAU)
2705 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2706
2707 PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI);
2708 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2709 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2710 SI->replaceAllUsesWith(Phi);
2711 SI->eraseFromParent();
2712
2713 if (MSSAU && VerifyMemorySSA)
2714 MSSAU->getMemorySSA()->verifyMemorySSA();
2715
2716 ++NumSelects;
2717 return CondBr;
2718}
2719
2720/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2721/// making the following replacement:
2722///
2723/// --code before guard--
2724/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2725/// --code after guard--
2726///
2727/// into
2728///
2729/// --code before guard--
2730/// br i1 %cond, label %guarded, label %deopt
2731///
2732/// guarded:
2733/// --code after guard--
2734///
2735/// deopt:
2736/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2737/// unreachable
2738///
2739/// It also makes all relevant DT and LI updates, so that all structures are in
2740/// valid state after this transform.
2742 DominatorTree &DT, LoopInfo &LI,
2743 MemorySSAUpdater *MSSAU) {
2745 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2746 BasicBlock *CheckBB = GI->getParent();
2747
2748 if (MSSAU && VerifyMemorySSA)
2749 MSSAU->getMemorySSA()->verifyMemorySSA();
2750
2751 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2752 Instruction *DeoptBlockTerm =
2754 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2755 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2756 // SplitBlockAndInsertIfThen inserts control flow that branches to
2757 // DeoptBlockTerm if the condition is true. We want the opposite.
2758 CheckBI->swapSuccessors();
2759
2760 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2761 GuardedBlock->setName("guarded");
2762 CheckBI->getSuccessor(1)->setName("deopt");
2763 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2764
2765 if (MSSAU)
2766 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2767
2768 GI->moveBefore(DeoptBlockTerm);
2770
2771 if (MSSAU) {
2772 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2773 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2774 if (VerifyMemorySSA)
2775 MSSAU->getMemorySSA()->verifyMemorySSA();
2776 }
2777
2778 if (VerifyLoopInfo)
2779 LI.verify(DT);
2780 ++NumGuards;
2781 return CheckBI;
2782}
2783
2784/// Cost multiplier is a way to limit potentially exponential behavior
2785/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2786/// candidates available. Also accounting for the number of "sibling" loops with
2787/// the idea to account for previous unswitches that already happened on this
2788/// cluster of loops. There was an attempt to keep this formula simple,
2789/// just enough to limit the worst case behavior. Even if it is not that simple
2790/// now it is still not an attempt to provide a detailed heuristic size
2791/// prediction.
2792///
2793/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2794/// unswitch candidates, making adequate predictions instead of wild guesses.
2795/// That requires knowing not just the number of "remaining" candidates but
2796/// also costs of unswitching for each of these candidates.
2798 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2799 const DominatorTree &DT,
2800 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2801
2802 // Guards and other exiting conditions do not contribute to exponential
2803 // explosion as soon as they dominate the latch (otherwise there might be
2804 // another path to the latch remaining that does not allow to eliminate the
2805 // loop copy on unswitch).
2806 const BasicBlock *Latch = L.getLoopLatch();
2807 const BasicBlock *CondBlock = TI.getParent();
2808 if (DT.dominates(CondBlock, Latch) &&
2809 (isGuard(&TI) ||
2810 (TI.isTerminator() &&
2811 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2812 return L.contains(SuccBB);
2813 }) <= 1))) {
2814 NumCostMultiplierSkipped++;
2815 return 1;
2816 }
2817
2818 auto *ParentL = L.getParentLoop();
2819 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2820 : std::distance(LI.begin(), LI.end()));
2821 // Count amount of clones that all the candidates might cause during
2822 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2823 // cases.
2824 int UnswitchedClones = 0;
2825 for (const auto &Candidate : UnswitchCandidates) {
2826 const Instruction *CI = Candidate.TI;
2827 const BasicBlock *CondBlock = CI->getParent();
2828 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2829 if (isa<SelectInst>(CI)) {
2830 UnswitchedClones++;
2831 continue;
2832 }
2833 if (isGuard(CI)) {
2834 if (!SkipExitingSuccessors)
2835 UnswitchedClones++;
2836 continue;
2837 }
2838 int NonExitingSuccessors =
2839 llvm::count_if(successors(CondBlock),
2840 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2841 return !SkipExitingSuccessors || L.contains(SuccBB);
2842 });
2843 UnswitchedClones += Log2_32(NonExitingSuccessors);
2844 }
2845
2846 // Ignore up to the "unscaled candidates" number of unswitch candidates
2847 // when calculating the power-of-two scaling of the cost. The main idea
2848 // with this control is to allow a small number of unswitches to happen
2849 // and rely more on siblings multiplier (see below) when the number
2850 // of candidates is small.
2851 unsigned ClonesPower =
2852 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2853
2854 // Allowing top-level loops to spread a bit more than nested ones.
2855 int SiblingsMultiplier =
2856 std::max((ParentL ? SiblingsCount
2857 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2858 1);
2859 // Compute the cost multiplier in a way that won't overflow by saturating
2860 // at an upper bound.
2861 int CostMultiplier;
2862 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2863 SiblingsMultiplier > UnswitchThreshold)
2864 CostMultiplier = UnswitchThreshold;
2865 else
2866 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2867 (int)UnswitchThreshold);
2868
2869 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2870 << " (siblings " << SiblingsMultiplier << " * clones "
2871 << (1 << ClonesPower) << ")"
2872 << " for unswitch candidate: " << TI << "\n");
2873 return CostMultiplier;
2874}
2875
2878 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2879 const Loop &L, const LoopInfo &LI, AAResults &AA,
2880 const MemorySSAUpdater *MSSAU) {
2881 assert(UnswitchCandidates.empty() && "Should be!");
2882
2883 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2885 if (isa<Constant>(Cond))
2886 return;
2887 if (L.isLoopInvariant(Cond)) {
2888 UnswitchCandidates.push_back({I, {Cond}});
2889 return;
2890 }
2892 TinyPtrVector<Value *> Invariants =
2894 L, *static_cast<Instruction *>(Cond), LI);
2895 if (!Invariants.empty())
2896 UnswitchCandidates.push_back({I, std::move(Invariants)});
2897 }
2898 };
2899
2900 // Whether or not we should also collect guards in the loop.
2901 bool CollectGuards = false;
2902 if (UnswitchGuards) {
2903 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2904 Intrinsic::getName(Intrinsic::experimental_guard));
2905 if (GuardDecl && !GuardDecl->use_empty())
2906 CollectGuards = true;
2907 }
2908
2909 for (auto *BB : L.blocks()) {
2910 if (LI.getLoopFor(BB) != &L)
2911 continue;
2912
2913 for (auto &I : *BB) {
2914 if (auto *SI = dyn_cast<SelectInst>(&I)) {
2915 auto *Cond = SI->getCondition();
2916 // Do not unswitch vector selects and logical and/or selects
2917 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2918 AddUnswitchCandidatesForInst(SI, Cond);
2919 } else if (CollectGuards && isGuard(&I)) {
2920 auto *Cond =
2921 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2922 // TODO: Support AND, OR conditions and partial unswitching.
2923 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2924 UnswitchCandidates.push_back({&I, {Cond}});
2925 }
2926 }
2927
2928 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2929 // We can only consider fully loop-invariant switch conditions as we need
2930 // to completely eliminate the switch after unswitching.
2931 if (!isa<Constant>(SI->getCondition()) &&
2932 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2933 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2934 continue;
2935 }
2936
2937 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2938 if (!BI || !BI->isConditional() ||
2939 BI->getSuccessor(0) == BI->getSuccessor(1))
2940 continue;
2941
2942 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2943 }
2944
2945 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2946 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2947 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2948 })) {
2949 MemorySSA *MSSA = MSSAU->getMemorySSA();
2950 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2951 LLVM_DEBUG(
2952 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2953 << *Info->InstToDuplicate[0] << "\n");
2954 PartialIVInfo = *Info;
2955 PartialIVCondBranch = L.getHeader()->getTerminator();
2956 TinyPtrVector<Value *> ValsToDuplicate;
2957 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2958 UnswitchCandidates.push_back(
2959 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2960 }
2961 }
2962 return !UnswitchCandidates.empty();
2963}
2964
2965/// Tries to canonicalize condition described by:
2966///
2967/// br (LHS pred RHS), label IfTrue, label IfFalse
2968///
2969/// into its equivalent where `Pred` is something that we support for injected
2970/// invariants (so far it is limited to ult), LHS in canonicalized form is
2971/// non-invariant and RHS is an invariant.
2973 ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue,
2974 BasicBlock *&IfFalse, const Loop &L) {
2975 if (!L.contains(IfTrue)) {
2976 Pred = ICmpInst::getInversePredicate(Pred);
2977 std::swap(IfTrue, IfFalse);
2978 }
2979
2980 // Move loop-invariant argument to RHS position.
2981 if (L.isLoopInvariant(LHS)) {
2982 Pred = ICmpInst::getSwappedPredicate(Pred);
2983 std::swap(LHS, RHS);
2984 }
2985
2986 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
2987 // Turn "x >=s 0" into "x <u UMIN_INT"
2988 Pred = ICmpInst::ICMP_ULT;
2989 RHS = ConstantInt::get(
2990 RHS->getContext(),
2992 }
2993}
2994
2995/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
2996/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
2997/// injecting a loop-invariant condition.
2999 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3000 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3001 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3002 return false;
3003 // TODO: Support other predicates.
3004 if (Pred != ICmpInst::ICMP_ULT)
3005 return false;
3006 // TODO: Support non-loop-exiting branches?
3007 if (!L.contains(IfTrue) || L.contains(IfFalse))
3008 return false;
3009 // FIXME: For some reason this causes problems with MSSA updates, need to
3010 // investigate why. So far, just don't unswitch latch.
3011 if (L.getHeader() == IfTrue)
3012 return false;
3013 return true;
3014}
3015
3016/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3017/// TakenSucc via injection of invariant conditions. The branch should be not
3018/// enough and not previously unswitched, the information about this comes from
3019/// the metadata.
3021 const BasicBlock *TakenSucc) {
3022 SmallVector<uint32_t> Weights;
3023 if (!extractBranchWeights(*BI, Weights))
3024 return false;
3026 BranchProbability LikelyTaken(T - 1, T);
3027
3028 assert(Weights.size() == 2 && "Unexpected profile data!");
3029 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3030 auto Num = Weights[Idx];
3031 auto Denom = Weights[0] + Weights[1];
3032 // Degenerate or overflowed metadata.
3033 if (Denom == 0 || Num > Denom)
3034 return false;
3035 BranchProbability ActualTaken(Num, Denom);
3036 if (LikelyTaken > ActualTaken)
3037 return false;
3038 return true;
3039}
3040
3041/// Materialize pending invariant condition of the given candidate into IR. The
3042/// injected loop-invariant condition implies the original loop-variant branch
3043/// condition, so the materialization turns
3044///
3045/// loop_block:
3046/// ...
3047/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3048///
3049/// into
3050///
3051/// preheader:
3052/// %invariant_cond = LHS pred RHS
3053/// ...
3054/// loop_block:
3055/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3056/// OriginalCheck:
3057/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3058/// ...
3059static NonTrivialUnswitchCandidate
3060injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3061 DominatorTree &DT, LoopInfo &LI,
3062 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3063 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3064 BasicBlock *Preheader = L.getLoopPreheader();
3065 assert(Preheader && "Loop is not in simplified form?");
3066 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3067 "Unswitching branch of inner loop!");
3068
3069 auto Pred = Candidate.PendingInjection->Pred;
3070 auto *LHS = Candidate.PendingInjection->LHS;
3071 auto *RHS = Candidate.PendingInjection->RHS;
3072 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3073 auto *TI = cast<BranchInst>(Candidate.TI);
3074 auto *BB = Candidate.TI->getParent();
3075 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3076 : TI->getSuccessor(0);
3077 // FIXME: Remove this once limitation on successors is lifted.
3078 assert(L.contains(InLoopSucc) && "Not supported yet!");
3079 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3080 auto &Ctx = BB->getContext();
3081
3082 IRBuilder<> Builder(Preheader->getTerminator());
3083 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3084 if (LHS->getType() != RHS->getType()) {
3085 if (LHS->getType()->getIntegerBitWidth() <
3087 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3088 else
3089 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3090 }
3091 // Do not use builder here: CreateICmp may simplify this into a constant and
3092 // unswitching will break. Better optimize it away later.
3093 auto *InjectedCond =
3094 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3095 Preheader->getTerminator());
3096
3097 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3098 BB->getParent(), InLoopSucc);
3099 Builder.SetInsertPoint(TI);
3100 auto *InvariantBr =
3101 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3102
3103 Builder.SetInsertPoint(CheckBlock);
3104 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3105 TI->getSuccessor(1));
3106 TI->eraseFromParent();
3107
3108 // Fixup phis.
3109 for (auto &I : *InLoopSucc) {
3110 auto *PN = dyn_cast<PHINode>(&I);
3111 if (!PN)
3112 break;
3113 auto *Inc = PN->getIncomingValueForBlock(BB);
3114 PN->addIncoming(Inc, CheckBlock);
3115 }
3116 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3117
3119 { DominatorTree::Insert, BB, CheckBlock },
3120 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3121 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3122 { DominatorTree::Delete, BB, OutOfLoopSucc }
3123 };
3124
3125 DT.applyUpdates(DTUpdates);
3126 if (MSSAU)
3127 MSSAU->applyUpdates(DTUpdates, DT);
3128 L.addBasicBlockToLoop(CheckBlock, LI);
3129
3130#ifndef NDEBUG
3131 DT.verify();
3132 LI.verify(DT);
3133 if (MSSAU && VerifyMemorySSA)
3134 MSSAU->getMemorySSA()->verifyMemorySSA();
3135#endif
3136
3137 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3138 // higher because we have just inserted a new block. Need to think how to
3139 // adjust the cost of injected candidates when it was first computed.
3140 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3141 << " and considering it for unswitching.");
3142 ++NumInvariantConditionsInjected;
3143 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3144 Candidate.Cost);
3145}
3146
3147/// Given chain of loop branch conditions looking like:
3148/// br (Variant < Invariant1)
3149/// br (Variant < Invariant2)
3150/// br (Variant < Invariant3)
3151/// ...
3152/// collect set of invariant conditions on which we want to unswitch, which
3153/// look like:
3154/// Invariant1 <= Invariant2
3155/// Invariant2 <= Invariant3
3156/// ...
3157/// Though they might not immediately exist in the IR, we can still inject them.
3159 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3161 const DominatorTree &DT) {
3162
3164 assert(ICmpInst::isStrictPredicate(Pred));
3165 if (Compares.size() < 2)
3166 return false;
3167 ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
3168 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3169 Next != Compares.end(); ++Prev, ++Next) {
3170 Value *LHS = Next->Invariant;
3171 Value *RHS = Prev->Invariant;
3172 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3173 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3174 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3175 std::nullopt, std::move(ToInject));
3176 UnswitchCandidates.push_back(std::move(Candidate));
3177 }
3178 return true;
3179}
3180
3181/// Collect unswitch candidates by invariant conditions that are not immediately
3182/// present in the loop. However, they can be injected into the code if we
3183/// decide it's profitable.
3184/// An example of such conditions is following:
3185///
3186/// for (...) {
3187/// x = load ...
3188/// if (! x <u C1) break;
3189/// if (! x <u C2) break;
3190/// <do something>
3191/// }
3192///
3193/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3194/// C2" automatically implies "x <u C2", so we can get rid of one of
3195/// loop-variant checks in unswitched loop version.
3198 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3199 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3200 const MemorySSAUpdater *MSSAU) {
3202 return false;
3203
3204 if (!DT.isReachableFromEntry(L.getHeader()))
3205 return false;
3206 auto *Latch = L.getLoopLatch();
3207 // Need to have a single latch and a preheader.
3208 if (!Latch)
3209 return false;
3210 assert(L.getLoopPreheader() && "Must have a preheader!");
3211
3213 // Traverse the conditions that dominate latch (and therefore dominate each
3214 // other).
3215 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3216 DTN = DTN->getIDom()) {
3218 Value *LHS = nullptr, *RHS = nullptr;
3219 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3220 auto *BB = DTN->getBlock();
3221 // Ignore inner loops.
3222 if (LI.getLoopFor(BB) != &L)
3223 continue;
3224 auto *Term = BB->getTerminator();
3225 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3226 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3227 continue;
3228 if (!LHS->getType()->isIntegerTy())
3229 continue;
3230 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3231 L);
3232 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3233 continue;
3234 if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
3235 continue;
3236 // Strip ZEXT for unsigned predicate.
3237 // TODO: once signed predicates are supported, also strip SEXT.
3238 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3239 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3240 LHS = Zext->getOperand(0);
3241 CandidatesULT[LHS].push_back(Desc);
3242 }
3243
3244 bool Found = false;
3245 for (auto &It : CandidatesULT)
3247 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3248 return Found;
3249}
3250
3252 if (!L.isSafeToClone())
3253 return false;
3254 for (auto *BB : L.blocks())
3255 for (auto &I : *BB) {
3256 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3257 return false;
3258 if (auto *CB = dyn_cast<CallBase>(&I)) {
3259 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3260 if (CB->isConvergent())
3261 return false;
3262 }
3263 }
3264
3265 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3266 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3267 // irreducible control flow into reducible control flow and introduce new
3268 // loops "out of thin air". If we ever discover important use cases for doing
3269 // this, we can add support to loop unswitch, but it is a lot of complexity
3270 // for what seems little or no real world benefit.
3271 LoopBlocksRPO RPOT(&L);
3272 RPOT.perform(&LI);
3273 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3274 return false;
3275
3277 L.getUniqueExitBlocks(ExitBlocks);
3278 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3279 // instruction as we don't know how to split those exit blocks.
3280 // FIXME: We should teach SplitBlock to handle this and remove this
3281 // restriction.
3282 for (auto *ExitBB : ExitBlocks) {
3283 auto *I = ExitBB->getFirstNonPHI();
3284 if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
3285 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3286 "in exit block\n");
3287 return false;
3288 }
3289 }
3290
3291 return true;
3292}
3293
3294static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3295 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3296 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3297 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3298 // Given that unswitching these terminators will require duplicating parts of
3299 // the loop, so we need to be able to model that cost. Compute the ephemeral
3300 // values and set up a data structure to hold per-BB costs. We cache each
3301 // block's cost so that we don't recompute this when considering different
3302 // subsets of the loop for duplication during unswitching.
3304 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3306
3307 // Compute the cost of each block, as well as the total loop cost. Also, bail
3308 // out if we see instructions which are incompatible with loop unswitching
3309 // (convergent, noduplicate, or cross-basic-block tokens).
3310 // FIXME: We might be able to safely handle some of these in non-duplicated
3311 // regions.
3313 L.getHeader()->getParent()->hasMinSize()
3316 InstructionCost LoopCost = 0;
3317 for (auto *BB : L.blocks()) {
3319 for (auto &I : *BB) {
3320 if (EphValues.count(&I))
3321 continue;
3323 }
3324 assert(Cost >= 0 && "Must not have negative costs!");
3325 LoopCost += Cost;
3326 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3327 BBCostMap[BB] = Cost;
3328 }
3329 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3330
3331 // Now we find the best candidate by searching for the one with the following
3332 // properties in order:
3333 //
3334 // 1) An unswitching cost below the threshold
3335 // 2) The smallest number of duplicated unswitch candidates (to avoid
3336 // creating redundant subsequent unswitching)
3337 // 3) The smallest cost after unswitching.
3338 //
3339 // We prioritize reducing fanout of unswitch candidates provided the cost
3340 // remains below the threshold because this has a multiplicative effect.
3341 //
3342 // This requires memoizing each dominator subtree to avoid redundant work.
3343 //
3344 // FIXME: Need to actually do the number of candidates part above.
3346 // Given a terminator which might be unswitched, computes the non-duplicated
3347 // cost for that terminator.
3348 auto ComputeUnswitchedCost = [&](Instruction &TI,
3349 bool FullUnswitch) -> InstructionCost {
3350 // Unswitching selects unswitches the entire loop.
3351 if (isa<SelectInst>(TI))
3352 return LoopCost;
3353
3354 BasicBlock &BB = *TI.getParent();
3356
3358 for (BasicBlock *SuccBB : successors(&BB)) {
3359 // Don't count successors more than once.
3360 if (!Visited.insert(SuccBB).second)
3361 continue;
3362
3363 // If this is a partial unswitch candidate, then it must be a conditional
3364 // branch with a condition of either `or`, `and`, their corresponding
3365 // select forms or partially invariant instructions. In that case, one of
3366 // the successors is necessarily duplicated, so don't even try to remove
3367 // its cost.
3368 if (!FullUnswitch) {
3369 auto &BI = cast<BranchInst>(TI);
3370 Value *Cond = skipTrivialSelect(BI.getCondition());
3371 if (match(Cond, m_LogicalAnd())) {
3372 if (SuccBB == BI.getSuccessor(1))
3373 continue;
3374 } else if (match(Cond, m_LogicalOr())) {
3375 if (SuccBB == BI.getSuccessor(0))
3376 continue;
3377 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3378 SuccBB == BI.getSuccessor(0)) ||
3379 (!PartialIVInfo.KnownValue->isOneValue() &&
3380 SuccBB == BI.getSuccessor(1)))
3381 continue;
3382 }
3383
3384 // This successor's domtree will not need to be duplicated after
3385 // unswitching if the edge to the successor dominates it (and thus the
3386 // entire tree). This essentially means there is no other path into this
3387 // subtree and so it will end up live in only one clone of the loop.
3388 if (SuccBB->getUniquePredecessor() ||
3389 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3390 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3391 })) {
3392 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3393 assert(Cost <= LoopCost &&
3394 "Non-duplicated cost should never exceed total loop cost!");
3395 }
3396 }
3397
3398 // Now scale the cost by the number of unique successors minus one. We
3399 // subtract one because there is already at least one copy of the entire
3400 // loop. This is computing the new cost of unswitching a condition.
3401 // Note that guards always have 2 unique successors that are implicit and
3402 // will be materialized if we decide to unswitch it.
3403 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3404 assert(SuccessorsCount > 1 &&
3405 "Cannot unswitch a condition without multiple distinct successors!");
3406 return (LoopCost - Cost) * (SuccessorsCount - 1);
3407 };
3408
3409 std::optional<NonTrivialUnswitchCandidate> Best;
3410 for (auto &Candidate : UnswitchCandidates) {
3411 Instruction &TI = *Candidate.TI;
3412 ArrayRef<Value *> Invariants = Candidate.Invariants;
3413 BranchInst *BI = dyn_cast<BranchInst>(&TI);
3414 bool FullUnswitch =
3415 !BI || Candidate.hasPendingInjection() ||
3416 (Invariants.size() == 1 &&
3417 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3418 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3419 // Calculate cost multiplier which is a tool to limit potentially
3420 // exponential behavior of loop-unswitch.
3422 int CostMultiplier =
3423 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3424 assert(
3425 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3426 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3427 CandidateCost *= CostMultiplier;
3428 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3429 << " (multiplier: " << CostMultiplier << ")"
3430 << " for unswitch candidate: " << TI << "\n");
3431 } else {
3432 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3433 << " for unswitch candidate: " << TI << "\n");
3434 }
3435
3436 if (!Best || CandidateCost < Best->Cost) {
3437 Best = Candidate;
3438 Best->Cost = CandidateCost;
3439 }
3440 }
3441 assert(Best && "Must be!");
3442 return *Best;
3443}
3444
3445// Insert a freeze on an unswitched branch if all is true:
3446// 1. freeze-loop-unswitch-cond option is true
3447// 2. The branch may not execute in the loop pre-transformation. If a branch may
3448// not execute and could cause UB, it would always cause UB if it is hoisted outside
3449// of the loop. Insert a freeze to prevent this case.
3450// 3. The branch condition may be poison or undef
3452 AssumptionCache &AC) {
3453 assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3455 return false;
3456
3457 ICFLoopSafetyInfo SafetyInfo;
3458 SafetyInfo.computeLoopSafetyInfo(&L);
3459 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3460 return false;
3461
3462 Value *Cond;
3463 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3464 Cond = skipTrivialSelect(BI->getCondition());
3465 else
3466 Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
3468 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3469}
3470
3472 AssumptionCache &AC, AAResults &AA,
3474 MemorySSAUpdater *MSSAU,
3475 LPMUpdater &LoopUpdater) {
3476 // Collect all invariant conditions within this loop (as opposed to an inner
3477 // loop which would be handled when visiting that inner loop).
3479 IVConditionInfo PartialIVInfo;
3480 Instruction *PartialIVCondBranch = nullptr;
3481 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3482 PartialIVCondBranch, L, LI, AA, MSSAU);
3483 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3484 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3485 PartialIVCondBranch, L, DT, LI, AA,
3486 MSSAU);
3487 // If we didn't find any candidates, we're done.
3488 if (UnswitchCandidates.empty())
3489 return false;
3490
3491 LLVM_DEBUG(
3492 dbgs() << "Considering " << UnswitchCandidates.size()
3493 << " non-trivial loop invariant conditions for unswitching.\n");
3494
3495 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3496 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3497
3498 assert(Best.TI && "Failed to find loop unswitch candidate");
3499 assert(Best.Cost && "Failed to compute cost");
3500
3501 if (*Best.Cost >= UnswitchThreshold) {
3502 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3503 << "\n");
3504 return false;
3505 }
3506
3507 bool InjectedCondition = false;
3508 if (Best.hasPendingInjection()) {
3509 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3510 InjectedCondition = true;
3511 }
3512 assert(!Best.hasPendingInjection() &&
3513 "All injections should have been done by now!");
3514
3515 if (Best.TI != PartialIVCondBranch)
3516 PartialIVInfo.InstToDuplicate.clear();
3517
3518 bool InsertFreeze;
3519 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3520 // If the best candidate is a select, turn it into a branch. Select
3521 // instructions with a poison conditional do not propagate poison, but
3522 // branching on poison causes UB. Insert a freeze on the select
3523 // conditional to prevent UB after turning the select into a branch.
3524 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3525 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3526 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3527 } else {
3528 // If the best candidate is a guard, turn it into a branch.
3529 if (isGuard(Best.TI))
3530 Best.TI =
3531 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3532 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3533 }
3534
3535 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3536 << ") terminator: " << *Best.TI << "\n");
3537 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3538 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3539 InjectedCondition);
3540 return true;
3541}
3542
3543/// Unswitch control flow predicated on loop invariant conditions.
3544///
3545/// This first hoists all branches or switches which are trivial (IE, do not
3546/// require duplicating any part of the loop) out of the loop body. It then
3547/// looks at other loop invariant control flows and tries to unswitch those as
3548/// well by cloning the loop if the result is small enough.
3549///
3550/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3551/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3552/// valid (i.e. its use is enabled).
3553///
3554/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3555/// true, we will attempt to do non-trivial unswitching as well as trivial
3556/// unswitching.
3557///
3558/// The `postUnswitch` function will be run after unswitching is complete
3559/// with information on whether or not the provided loop remains a loop and
3560/// a list of new sibling loops created.
3561///
3562/// If `SE` is non-null, we will update that analysis based on the unswitching
3563/// done.
3564static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3565 AssumptionCache &AC, AAResults &AA,
3566 TargetTransformInfo &TTI, bool Trivial,
3567 bool NonTrivial, ScalarEvolution *SE,
3569 BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
3570 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3571 "Loops must be in LCSSA form before unswitching.");
3572
3573 // Must be in loop simplified form: we need a preheader and dedicated exits.
3574 if (!L.isLoopSimplifyForm())
3575 return false;
3576
3577 // Try trivial unswitch first before loop over other basic blocks in the loop.
3578 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3579 // If we unswitched successfully we will want to clean up the loop before
3580 // processing it further so just mark it as unswitched and return.
3581 postUnswitch(L, LoopUpdater, L.getName(),
3582 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3583 /*InjectedCondition*/ false, {});
3584 return true;
3585 }
3586
3587 const Function *F = L.getHeader()->getParent();
3588
3589 // Check whether we should continue with non-trivial conditions.
3590 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3591 // unswitching for testing and debugging.
3592 // NonTrivial: Parameter that enables non-trivial unswitching for this
3593 // invocation of the transform. But this should be allowed only
3594 // for targets without branch divergence.
3595 //
3596 // FIXME: If divergence analysis becomes available to a loop
3597 // transform, we should allow unswitching for non-trivial uniform
3598 // branches even on targets that have divergence.
3599 // https://bugs.llvm.org/show_bug.cgi?id=48819
3600 bool ContinueWithNonTrivial =
3602 if (!ContinueWithNonTrivial)
3603 return false;
3604
3605 // Skip non-trivial unswitching for optsize functions.
3606 if (F->hasOptSize())
3607 return false;
3608
3609 // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
3610 // of the loops L is nested in, and of the loops nested in L are all cold.
3611 auto IsLoopNestCold = [&](const Loop *L) {
3612 // Check L and all of its parent loops.
3613 auto *Parent = L;
3614 while (Parent) {
3615 if (!PSI->isColdBlock(Parent->getHeader(), BFI))
3616 return false;
3617 Parent = Parent->getParentLoop();
3618 }
3619 // Next check all loops nested within L.
3621 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
3622 L->getSubLoops().end());
3623 while (!Worklist.empty()) {
3624 auto *CurLoop = Worklist.pop_back_val();
3625 if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
3626 return false;
3627 Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),
3628 CurLoop->getSubLoops().end());
3629 }
3630 return true;
3631 };
3632
3633 // Skip cold loops in cold loop nests, as unswitching them brings little
3634 // benefit but increases the code size
3635 if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
3636 LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
3637 return false;
3638 }
3639
3640 // Perform legality checks.
3642 return false;
3643
3644 // For non-trivial unswitching, because it often creates new loops, we rely on
3645 // the pass manager to iterate on the loops rather than trying to immediately
3646 // reach a fixed point. There is no substantial advantage to iterating
3647 // internally, and if any of the new loops are simplified enough to contain
3648 // trivial unswitching we want to prefer those.
3649
3650 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3651 // a partial unswitch when possible below the threshold.
3652 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3653 return true;
3654
3655 // No other opportunities to unswitch.
3656 return false;
3657}
3658
3661 LPMUpdater &U) {
3662 Function &F = *L.getHeader()->getParent();
3663 (void)F;
3664 ProfileSummaryInfo *PSI = nullptr;
3665 if (auto OuterProxy =
3667 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3668 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3669 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3670 << "\n");
3671
3672 std::optional<MemorySSAUpdater> MSSAU;
3673 if (AR.MSSA) {
3674 MSSAU = MemorySSAUpdater(AR.MSSA);
3675 if (VerifyMemorySSA)
3676 AR.MSSA->verifyMemorySSA();
3677 }
3678 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3679 &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
3680 return PreservedAnalyses::all();
3681
3682 if (AR.MSSA && VerifyMemorySSA)
3683 AR.MSSA->verifyMemorySSA();
3684
3685 // Historically this pass has had issues with the dominator tree so verify it
3686 // in asserts builds.
3687 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3688
3689 auto PA = getLoopPassPreservedAnalyses();
3690 if (AR.MSSA)
3691 PA.preserve<MemorySSAAnalysis>();
3692 return PA;
3693}
3694
3696 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3698 OS, MapClassName2PassName);
3699
3700 OS << '<';
3701 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3702 OS << (Trivial ? "" : "no-") << "trivial";
3703 OS << '>';
3704}
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
LLVMContext & Context
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static void canonicalizeForInvariantConditionInjection(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:498
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:198
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
size_t size() const
Definition: BasicBlock.h:450
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.h:357
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:489
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1641
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1646
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:967
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
This is an important base class in LLVM.
Definition: Constant.h:41
bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:124
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator begin()
Definition: DenseMap.h:75
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:79
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2518
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1114
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2010
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
const BasicBlock * getParent() const
Definition: Instruction.h:150
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:357
bool isTerminator() const
Definition: Instruction.h:253
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1633
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
StringRef getName() const
Definition: LoopInfo.h:388
Metadata node.
Definition: Metadata.h:1067
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:597
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block.
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1857
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:765
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:783
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, BasicBlock::iterator InsertBefore)
BasicBlock * getDefaultDest() const
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
void push_back(EltTy NewVal)
bool empty() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getIntegerBitWidth() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:164
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:151
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1013
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:525
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:168
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:545
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void stable_sort(R &&Range)
Definition: STLExtras.h:1975
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2053
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1043
Op::Description Desc
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:885
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:50
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
void RemapDPValueRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DPValue V using the value map VM.
Definition: ValueMapper.h:279
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1126
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2031
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1926
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:363
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:539
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:542
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:545
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:220
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:91