LLVM 22.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/Dominators.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Module.h"
46#include "llvm/IR/Use.h"
47#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <iterator>
64#include <numeric>
65#include <optional>
66#include <utility>
67
68#define DEBUG_TYPE "simple-loop-unswitch"
69
70using namespace llvm;
71using namespace llvm::PatternMatch;
72
73STATISTIC(NumBranches, "Number of branches unswitched");
74STATISTIC(NumSwitches, "Number of switches unswitched");
75STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
76STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
77STATISTIC(NumTrivial, "Number of unswitches that are trivial");
79 NumCostMultiplierSkipped,
80 "Number of unswitch candidates that had their cost multiplier skipped");
81STATISTIC(NumInvariantConditionsInjected,
82 "Number of invariant conditions injected and unswitched");
83
84namespace llvm {
86 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
87 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
88 "following the configuration passed into the pass."));
89
90static cl::opt<int>
91 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
92 cl::desc("The cost threshold for unswitching a loop."));
93
95 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
96 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
97 "explosion in nontrivial unswitch."));
99 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
100 cl::desc("Toplevel siblings divisor for cost multiplier."));
102 "unswitch-parent-blocks-div", cl::init(8), cl::Hidden,
103 cl::desc("Outer loop size divisor for cost multiplier."));
105 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
106 cl::desc("Number of unswitch candidates that are ignored when calculating "
107 "cost multiplier."));
109 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
110 cl::desc("If enabled, simple loop unswitching will also consider "
111 "llvm.experimental.guard intrinsics as unswitch candidates."));
113 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
114 cl::init(false), cl::Hidden,
115 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
116 "null checks to save time analyzing if we can keep it."));
118 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
119 cl::desc("Max number of memory uses to explore during "
120 "partial unswitching analysis"),
121 cl::init(100), cl::Hidden);
123 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
124 cl::desc("If enabled, the freeze instruction will be added to condition "
125 "of loop unswitch to prevent miscompilation."));
126
128 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
129 cl::desc("Whether we should inject new invariants and unswitch them to "
130 "eliminate some existing (non-invariant) conditions."),
131 cl::init(true));
132
134 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
136 cl::desc("Only try to inject loop invariant conditions and "
137 "unswitch on them to eliminate branches that are "
138 "not-taken 1/<this option> times or less."),
139 cl::init(16));
140
141static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
142 cl::Hidden, cl::init(true));
144} // namespace llvm
145
147namespace {
148struct CompareDesc {
149 BranchInst *Term;
150 Value *Invariant;
151 BasicBlock *InLoopSucc;
152
153 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
154 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
155};
156
157struct InjectedInvariant {
158 ICmpInst::Predicate Pred;
159 Value *LHS;
160 Value *RHS;
161 BasicBlock *InLoopSucc;
162
163 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
164 BasicBlock *InLoopSucc)
165 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
166};
167
168struct NonTrivialUnswitchCandidate {
169 Instruction *TI = nullptr;
170 TinyPtrVector<Value *> Invariants;
171 std::optional<InstructionCost> Cost;
172 std::optional<InjectedInvariant> PendingInjection;
173 NonTrivialUnswitchCandidate(
174 Instruction *TI, ArrayRef<Value *> Invariants,
175 std::optional<InstructionCost> Cost = std::nullopt,
176 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
177 : TI(TI), Invariants(Invariants), Cost(Cost),
178 PendingInjection(PendingInjection) {};
179
180 bool hasPendingInjection() const { return PendingInjection.has_value(); }
181};
182} // end anonymous namespace.
183
184// Helper to skip (select x, true, false), which matches both a logical AND and
185// OR and can confuse code that tries to determine if \p Cond is either a
186// logical AND or OR but not both.
188 Value *CondNext;
189 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
190 Cond = CondNext;
191 return Cond;
192}
193
194/// Collect all of the loop invariant input values transitively used by the
195/// homogeneous instruction graph from a given root.
196///
197/// This essentially walks from a root recursively through loop variant operands
198/// which have perform the same logical operation (AND or OR) and finds all
199/// inputs which are loop invariant. For some operations these can be
200/// re-associated and unswitched out of the loop entirely.
203 const LoopInfo &LI) {
204 assert(!L.isLoopInvariant(&Root) &&
205 "Only need to walk the graph if root itself is not invariant.");
206 TinyPtrVector<Value *> Invariants;
207
208 bool IsRootAnd = match(&Root, m_LogicalAnd());
209 bool IsRootOr = match(&Root, m_LogicalOr());
210
211 // Build a worklist and recurse through operators collecting invariants.
214 Worklist.push_back(&Root);
215 Visited.insert(&Root);
216 do {
217 Instruction &I = *Worklist.pop_back_val();
218 for (Value *OpV : I.operand_values()) {
219 // Skip constants as unswitching isn't interesting for them.
220 if (isa<Constant>(OpV))
221 continue;
222
223 // Add it to our result if loop invariant.
224 if (L.isLoopInvariant(OpV)) {
225 Invariants.push_back(OpV);
226 continue;
227 }
228
229 // If not an instruction with the same opcode, nothing we can do.
231
232 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
233 (IsRootOr && match(OpI, m_LogicalOr())))) {
234 // Visit this operand.
235 if (Visited.insert(OpI).second)
236 Worklist.push_back(OpI);
237 }
238 }
239 } while (!Worklist.empty());
240
241 return Invariants;
242}
243
244static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
245 Constant &Replacement) {
246 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
247
248 // Replace uses of LIC in the loop with the given constant.
249 // We use make_early_inc_range as set invalidates the iterator.
250 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
251 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
252
253 // Replace this use within the loop body.
254 if (UserI && L.contains(UserI))
255 U.set(&Replacement);
256 }
257}
258
259/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
260/// incoming values along this edge.
262 const BasicBlock &ExitingBB,
263 const BasicBlock &ExitBB) {
264 for (const Instruction &I : ExitBB) {
265 auto *PN = dyn_cast<PHINode>(&I);
266 if (!PN)
267 // No more PHIs to check.
268 return true;
269
270 // If the incoming value for this edge isn't loop invariant the unswitch
271 // won't be trivial.
272 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
273 return false;
274 }
275 llvm_unreachable("Basic blocks should never be empty!");
276}
277
278/// Copy a set of loop invariant values \p Invariants and insert them at the
279/// end of \p BB and conditionally branch on the copied condition. We only
280/// branch on a single value.
281/// We attempt to estimate the profile of the resulting conditional branch from
282/// \p ComputeProfFrom, which is the original conditional branch we're
283/// unswitching.
284/// When \p Direction is true, the \p Invariants form a disjunction, and the
285/// branch conditioned on it exits the loop on the "true" case. When \p
286/// Direction is false, the \p Invariants form a conjunction and the branch
287/// exits on the "false" case.
289 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
290 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
291 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
292 const BranchInst &ComputeProfFrom) {
293
294 SmallVector<uint32_t> BranchWeights;
295 bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
296 extractBranchWeights(ComputeProfFrom, BranchWeights);
297 // If Direction is true, that means we had a disjunction and that the "true"
298 // case exits. The probability of the disjunction of the subset of terms is at
299 // most as high as the original one. So, if the probability is higher than the
300 // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
301 // but if it's lower, we will use the original probability.
302 // Conversely, if Direction is false, that means we had a conjunction, and the
303 // probability of exiting is captured in the second branch weight. That
304 // probability is a disjunction (of the negation of the original terms). The
305 // same reasoning applies as above.
306 // Issue #165649: should we expect BFI to conserve, and use that to calculate
307 // the branch weights?
308 if (HasBranchWeights &&
309 static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
310 static_cast<double>(sum_of(BranchWeights)) >
311 0.5)
312 HasBranchWeights = false;
313
314 IRBuilder<> IRB(&BB);
316
317 SmallVector<Value *> FrozenInvariants;
318 for (Value *Inv : Invariants) {
319 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
320 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
321 FrozenInvariants.push_back(Inv);
322 }
323
324 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
325 : IRB.CreateAnd(FrozenInvariants);
326 auto *BR = IRB.CreateCondBr(
327 Cond, Direction ? &UnswitchedSucc : &NormalSucc,
328 Direction ? &NormalSucc : &UnswitchedSucc,
329 HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
330 : nullptr);
331 if (!HasBranchWeights)
333 *BR, *BR->getParent()->getParent(), DEBUG_TYPE);
334}
335
336/// Copy a set of loop invariant values, and conditionally branch on them.
338 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
339 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
340 MemorySSAUpdater *MSSAU) {
342 for (auto *Val : reverse(ToDuplicate)) {
343 Instruction *Inst = cast<Instruction>(Val);
344 Instruction *NewInst = Inst->clone();
345
346 if (const DebugLoc &DL = Inst->getDebugLoc())
347 mapAtomInstance(DL, VMap);
348
349 NewInst->insertInto(&BB, BB.end());
350 RemapInstruction(NewInst, VMap,
352 VMap[Val] = NewInst;
353
354 if (!MSSAU)
355 continue;
356
357 MemorySSA *MSSA = MSSAU->getMemorySSA();
358 if (auto *MemUse =
360 auto *DefiningAccess = MemUse->getDefiningAccess();
361 // Get the first defining access before the loop.
362 while (L.contains(DefiningAccess->getBlock())) {
363 // If the defining access is a MemoryPhi, get the incoming
364 // value for the pre-header as defining access.
365 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
366 DefiningAccess =
367 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
368 else
369 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
370 }
371 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
372 NewInst->getParent(),
374 }
375 }
376
377 IRBuilder<> IRB(&BB);
379 Value *Cond = VMap[ToDuplicate[0]];
380 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
381 Direction ? &NormalSucc : &UnswitchedSucc);
382}
383
384/// Rewrite the PHI nodes in an unswitched loop exit basic block.
385///
386/// Requires that the loop exit and unswitched basic block are the same, and
387/// that the exiting block was a unique predecessor of that block. Rewrites the
388/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
389/// PHI nodes from the old preheader that now contains the unswitched
390/// terminator.
392 BasicBlock &OldExitingBB,
393 BasicBlock &OldPH) {
394 for (PHINode &PN : UnswitchedBB.phis()) {
395 // When the loop exit is directly unswitched we just need to update the
396 // incoming basic block. We loop to handle weird cases with repeated
397 // incoming blocks, but expect to typically only have one operand here.
398 for (auto i : seq<int>(0, PN.getNumOperands())) {
399 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
400 "Found incoming block different from unique predecessor!");
401 PN.setIncomingBlock(i, &OldPH);
402 }
403 }
404}
405
406/// Rewrite the PHI nodes in the loop exit basic block and the split off
407/// unswitched block.
408///
409/// Because the exit block remains an exit from the loop, this rewrites the
410/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
411/// nodes into the unswitched basic block to select between the value in the
412/// old preheader and the loop exit.
414 BasicBlock &UnswitchedBB,
415 BasicBlock &OldExitingBB,
416 BasicBlock &OldPH,
417 bool FullUnswitch) {
418 assert(&ExitBB != &UnswitchedBB &&
419 "Must have different loop exit and unswitched blocks!");
420 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
421 for (PHINode &PN : ExitBB.phis()) {
422 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
423 PN.getName() + ".split");
424 NewPN->insertBefore(InsertPt);
425
426 // Walk backwards over the old PHI node's inputs to minimize the cost of
427 // removing each one. We have to do this weird loop manually so that we
428 // create the same number of new incoming edges in the new PHI as we expect
429 // each case-based edge to be included in the unswitched switch in some
430 // cases.
431 // FIXME: This is really, really gross. It would be much cleaner if LLVM
432 // allowed us to create a single entry for a predecessor block without
433 // having separate entries for each "edge" even though these edges are
434 // required to produce identical results.
435 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
436 if (PN.getIncomingBlock(i) != &OldExitingBB)
437 continue;
438
439 Value *Incoming = PN.getIncomingValue(i);
440 if (FullUnswitch)
441 // No more edge from the old exiting block to the exit block.
442 PN.removeIncomingValue(i);
443
444 NewPN->addIncoming(Incoming, &OldPH);
445 }
446
447 // Now replace the old PHI with the new one and wire the old one in as an
448 // input to the new one.
449 PN.replaceAllUsesWith(NewPN);
450 NewPN->addIncoming(&PN, &ExitBB);
451 }
452}
453
454/// Hoist the current loop up to the innermost loop containing a remaining exit.
455///
456/// Because we've removed an exit from the loop, we may have changed the set of
457/// loops reachable and need to move the current loop up the loop nest or even
458/// to an entirely separate nest.
459static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
460 DominatorTree &DT, LoopInfo &LI,
461 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
462 // If the loop is already at the top level, we can't hoist it anywhere.
463 Loop *OldParentL = L.getParentLoop();
464 if (!OldParentL)
465 return;
466
468 L.getExitBlocks(Exits);
469 Loop *NewParentL = nullptr;
470 for (auto *ExitBB : Exits)
471 if (Loop *ExitL = LI.getLoopFor(ExitBB))
472 if (!NewParentL || NewParentL->contains(ExitL))
473 NewParentL = ExitL;
474
475 if (NewParentL == OldParentL)
476 return;
477
478 // The new parent loop (if different) should always contain the old one.
479 if (NewParentL)
480 assert(NewParentL->contains(OldParentL) &&
481 "Can only hoist this loop up the nest!");
482
483 // The preheader will need to move with the body of this loop. However,
484 // because it isn't in this loop we also need to update the primary loop map.
485 assert(OldParentL == LI.getLoopFor(&Preheader) &&
486 "Parent loop of this loop should contain this loop's preheader!");
487 LI.changeLoopFor(&Preheader, NewParentL);
488
489 // Remove this loop from its old parent.
490 OldParentL->removeChildLoop(&L);
491
492 // Add the loop either to the new parent or as a top-level loop.
493 if (NewParentL)
494 NewParentL->addChildLoop(&L);
495 else
496 LI.addTopLevelLoop(&L);
497
498 // Remove this loops blocks from the old parent and every other loop up the
499 // nest until reaching the new parent. Also update all of these
500 // no-longer-containing loops to reflect the nesting change.
501 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
502 OldContainingL = OldContainingL->getParentLoop()) {
503 llvm::erase_if(OldContainingL->getBlocksVector(),
504 [&](const BasicBlock *BB) {
505 return BB == &Preheader || L.contains(BB);
506 });
507
508 OldContainingL->getBlocksSet().erase(&Preheader);
509 for (BasicBlock *BB : L.blocks())
510 OldContainingL->getBlocksSet().erase(BB);
511
512 // Because we just hoisted a loop out of this one, we have essentially
513 // created new exit paths from it. That means we need to form LCSSA PHI
514 // nodes for values used in the no-longer-nested loop.
515 formLCSSA(*OldContainingL, DT, &LI, SE);
516
517 // We shouldn't need to form dedicated exits because the exit introduced
518 // here is the (just split by unswitching) preheader. However, after trivial
519 // unswitching it is possible to get new non-dedicated exits out of parent
520 // loop so let's conservatively form dedicated exit blocks and figure out
521 // if we can optimize later.
522 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
523 /*PreserveLCSSA*/ true);
524 }
525}
526
527// Return the top-most loop containing ExitBB and having ExitBB as exiting block
528// or the loop containing ExitBB, if there is no parent loop containing ExitBB
529// as exiting block.
531 const LoopInfo &LI) {
532 Loop *TopMost = LI.getLoopFor(ExitBB);
533 Loop *Current = TopMost;
534 while (Current) {
535 if (Current->isLoopExiting(ExitBB))
536 TopMost = Current;
537 Current = Current->getParentLoop();
538 }
539 return TopMost;
540}
541
542/// Unswitch a trivial branch if the condition is loop invariant.
543///
544/// This routine should only be called when loop code leading to the branch has
545/// been validated as trivial (no side effects). This routine checks if the
546/// condition is invariant and one of the successors is a loop exit. This
547/// allows us to unswitch without duplicating the loop, making it trivial.
548///
549/// If this routine fails to unswitch the branch it returns false.
550///
551/// If the branch can be unswitched, this routine splits the preheader and
552/// hoists the branch above that split. Preserves loop simplified form
553/// (splitting the exit block as necessary). It simplifies the branch within
554/// the loop to an unconditional branch but doesn't remove it entirely. Further
555/// cleanup can be done with some simplifycfg like pass.
556///
557/// If `SE` is not null, it will be updated based on the potential loop SCEVs
558/// invalidated by this.
560 LoopInfo &LI, ScalarEvolution *SE,
561 MemorySSAUpdater *MSSAU) {
562 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
563 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
564
565 // The loop invariant values that we want to unswitch.
566 TinyPtrVector<Value *> Invariants;
567
568 // When true, we're fully unswitching the branch rather than just unswitching
569 // some input conditions to the branch.
570 bool FullUnswitch = false;
571
573 if (L.isLoopInvariant(Cond)) {
574 Invariants.push_back(Cond);
575 FullUnswitch = true;
576 } else {
577 if (auto *CondInst = dyn_cast<Instruction>(Cond))
578 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
579 if (Invariants.empty()) {
580 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
581 return false;
582 }
583 }
584
585 // Check that one of the branch's successors exits, and which one.
586 bool ExitDirection = true;
587 int LoopExitSuccIdx = 0;
588 auto *LoopExitBB = BI.getSuccessor(0);
589 if (L.contains(LoopExitBB)) {
590 ExitDirection = false;
591 LoopExitSuccIdx = 1;
592 LoopExitBB = BI.getSuccessor(1);
593 if (L.contains(LoopExitBB)) {
594 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
595 return false;
596 }
597 }
598 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
599 auto *ParentBB = BI.getParent();
600 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
601 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
602 return false;
603 }
604
605 // When unswitching only part of the branch's condition, we need the exit
606 // block to be reached directly from the partially unswitched input. This can
607 // be done when the exit block is along the true edge and the branch condition
608 // is a graph of `or` operations, or the exit block is along the false edge
609 // and the condition is a graph of `and` operations.
610 if (!FullUnswitch) {
611 if (ExitDirection ? !match(Cond, m_LogicalOr())
612 : !match(Cond, m_LogicalAnd())) {
613 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
614 "non-full unswitch!\n");
615 return false;
616 }
617 }
618
619 LLVM_DEBUG({
620 dbgs() << " unswitching trivial invariant conditions for: " << BI
621 << "\n";
622 for (Value *Invariant : Invariants) {
623 dbgs() << " " << *Invariant << " == true";
624 if (Invariant != Invariants.back())
625 dbgs() << " ||";
626 dbgs() << "\n";
627 }
628 });
629
630 // If we have scalar evolutions, we need to invalidate them including this
631 // loop, the loop containing the exit block and the topmost parent loop
632 // exiting via LoopExitBB.
633 if (SE) {
634 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
635 SE->forgetLoop(ExitL);
636 else
637 // Forget the entire nest as this exits the entire nest.
638 SE->forgetTopmostLoop(&L);
640 }
641
642 if (MSSAU && VerifyMemorySSA)
643 MSSAU->getMemorySSA()->verifyMemorySSA();
644
645 // Split the preheader, so that we know that there is a safe place to insert
646 // the conditional branch. We will change the preheader to have a conditional
647 // branch on LoopCond.
648 BasicBlock *OldPH = L.getLoopPreheader();
649 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
650
651 // Now that we have a place to insert the conditional branch, create a place
652 // to branch to: this is the exit block out of the loop that we are
653 // unswitching. We need to split this if there are other loop predecessors.
654 // Because the loop is in simplified form, *any* other predecessor is enough.
655 BasicBlock *UnswitchedBB;
656 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
657 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
658 "A branch's parent isn't a predecessor!");
659 UnswitchedBB = LoopExitBB;
660 } else {
661 UnswitchedBB =
662 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
663 }
664
665 if (MSSAU && VerifyMemorySSA)
666 MSSAU->getMemorySSA()->verifyMemorySSA();
667
668 // Actually move the invariant uses into the unswitched position. If possible,
669 // we do this by moving the instructions, but when doing partial unswitching
670 // we do it by building a new merge of the values in the unswitched position.
671 OldPH->getTerminator()->eraseFromParent();
672 if (FullUnswitch) {
673 // If fully unswitching, we can use the existing branch instruction.
674 // Splice it into the old PH to gate reaching the new preheader and re-point
675 // its successors.
676 BI.moveBefore(*OldPH, OldPH->end());
677 BI.setCondition(Cond);
678 if (MSSAU) {
679 // Temporarily clone the terminator, to make MSSA update cheaper by
680 // separating "insert edge" updates from "remove edge" ones.
681 BI.clone()->insertInto(ParentBB, ParentBB->end());
682 } else {
683 // Create a new unconditional branch that will continue the loop as a new
684 // terminator.
685 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
686 NewBI->setDebugLoc(BI.getDebugLoc());
687 }
688 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
689 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
690 } else {
691 // Only unswitching a subset of inputs to the condition, so we will need to
692 // build a new branch that merges the invariant inputs.
693 if (ExitDirection)
695 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
696 "condition!");
697 else
699 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
700 " condition!");
702 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
703 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
704 }
705
706 // Update the dominator tree with the added edge.
707 DT.insertEdge(OldPH, UnswitchedBB);
708
709 // After the dominator tree was updated with the added edge, update MemorySSA
710 // if available.
711 if (MSSAU) {
713 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
714 MSSAU->applyInsertUpdates(Updates, DT);
715 }
716
717 // Finish updating dominator tree and memory ssa for full unswitch.
718 if (FullUnswitch) {
719 if (MSSAU) {
720 Instruction *Term = ParentBB->getTerminator();
721 // Remove the cloned branch instruction and create unconditional branch
722 // now.
723 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
724 NewBI->setDebugLoc(Term->getDebugLoc());
725 Term->eraseFromParent();
726 MSSAU->removeEdge(ParentBB, LoopExitBB);
727 }
728 DT.deleteEdge(ParentBB, LoopExitBB);
729 }
730
731 if (MSSAU && VerifyMemorySSA)
732 MSSAU->getMemorySSA()->verifyMemorySSA();
733
734 // Rewrite the relevant PHI nodes.
735 if (UnswitchedBB == LoopExitBB)
736 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
737 else
738 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
739 *ParentBB, *OldPH, FullUnswitch);
740
741 // The constant we can replace all of our invariants with inside the loop
742 // body. If any of the invariants have a value other than this the loop won't
743 // be entered.
744 ConstantInt *Replacement = ExitDirection
747
748 // Since this is an i1 condition we can also trivially replace uses of it
749 // within the loop with a constant.
750 for (Value *Invariant : Invariants)
751 replaceLoopInvariantUses(L, Invariant, *Replacement);
752
753 // If this was full unswitching, we may have changed the nesting relationship
754 // for this loop so hoist it to its correct parent if needed.
755 if (FullUnswitch)
756 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
757
758 if (MSSAU && VerifyMemorySSA)
759 MSSAU->getMemorySSA()->verifyMemorySSA();
760
761 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
762 ++NumTrivial;
763 ++NumBranches;
764 return true;
765}
766
767/// Unswitch a trivial switch if the condition is loop invariant.
768///
769/// This routine should only be called when loop code leading to the switch has
770/// been validated as trivial (no side effects). This routine checks if the
771/// condition is invariant and that at least one of the successors is a loop
772/// exit. This allows us to unswitch without duplicating the loop, making it
773/// trivial.
774///
775/// If this routine fails to unswitch the switch it returns false.
776///
777/// If the switch can be unswitched, this routine splits the preheader and
778/// copies the switch above that split. If the default case is one of the
779/// exiting cases, it copies the non-exiting cases and points them at the new
780/// preheader. If the default case is not exiting, it copies the exiting cases
781/// and points the default at the preheader. It preserves loop simplified form
782/// (splitting the exit blocks as necessary). It simplifies the switch within
783/// the loop by removing now-dead cases. If the default case is one of those
784/// unswitched, it replaces its destination with a new basic block containing
785/// only unreachable. Such basic blocks, while technically loop exits, are not
786/// considered for unswitching so this is a stable transform and the same
787/// switch will not be revisited. If after unswitching there is only a single
788/// in-loop successor, the switch is further simplified to an unconditional
789/// branch. Still more cleanup can be done with some simplifycfg like pass.
790///
791/// If `SE` is not null, it will be updated based on the potential loop SCEVs
792/// invalidated by this.
794 LoopInfo &LI, ScalarEvolution *SE,
795 MemorySSAUpdater *MSSAU) {
796 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
797 Value *LoopCond = SI.getCondition();
798
799 // If this isn't switching on an invariant condition, we can't unswitch it.
800 if (!L.isLoopInvariant(LoopCond))
801 return false;
802
803 auto *ParentBB = SI.getParent();
804
805 // The same check must be used both for the default and the exit cases. We
806 // should never leave edges from the switch instruction to a basic block that
807 // we are unswitching, hence the condition used to determine the default case
808 // needs to also be used to populate ExitCaseIndices, which is then used to
809 // remove cases from the switch.
810 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
811 // BBToCheck is not an exit block if it is inside loop L.
812 if (L.contains(&BBToCheck))
813 return false;
814 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
815 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
816 return false;
817 // We do not unswitch a block that only has an unreachable statement, as
818 // it's possible this is a previously unswitched block. Only unswitch if
819 // either the terminator is not unreachable, or, if it is, it's not the only
820 // instruction in the block.
821 auto *TI = BBToCheck.getTerminator();
822 bool isUnreachable = isa<UnreachableInst>(TI);
823 return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
824 };
825
826 SmallVector<int, 4> ExitCaseIndices;
827 for (auto Case : SI.cases())
828 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
829 ExitCaseIndices.push_back(Case.getCaseIndex());
830 BasicBlock *DefaultExitBB = nullptr;
833 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
834 DefaultExitBB = SI.getDefaultDest();
835 } else if (ExitCaseIndices.empty())
836 return false;
837
838 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
839
840 if (MSSAU && VerifyMemorySSA)
841 MSSAU->getMemorySSA()->verifyMemorySSA();
842
843 // We may need to invalidate SCEVs for the outermost loop reached by any of
844 // the exits.
845 Loop *OuterL = &L;
846
847 if (DefaultExitBB) {
848 // Check the loop containing this exit.
849 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
850 if (!ExitL || ExitL->contains(OuterL))
851 OuterL = ExitL;
852 }
853 for (unsigned Index : ExitCaseIndices) {
854 auto CaseI = SI.case_begin() + Index;
855 // Compute the outer loop from this exit.
856 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
857 if (!ExitL || ExitL->contains(OuterL))
858 OuterL = ExitL;
859 }
860
861 if (SE) {
862 if (OuterL)
863 SE->forgetLoop(OuterL);
864 else
865 SE->forgetTopmostLoop(&L);
866 }
867
868 if (DefaultExitBB) {
869 // Clear out the default destination temporarily to allow accurate
870 // predecessor lists to be examined below.
871 SI.setDefaultDest(nullptr);
872 }
873
874 // Store the exit cases into a separate data structure and remove them from
875 // the switch.
876 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
878 4> ExitCases;
879 ExitCases.reserve(ExitCaseIndices.size());
881 // We walk the case indices backwards so that we remove the last case first
882 // and don't disrupt the earlier indices.
883 for (unsigned Index : reverse(ExitCaseIndices)) {
884 auto CaseI = SI.case_begin() + Index;
885 // Save the value of this case.
886 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
887 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
888 // Delete the unswitched cases.
889 SIW.removeCase(CaseI);
890 }
891
892 // Check if after this all of the remaining cases point at the same
893 // successor.
894 BasicBlock *CommonSuccBB = nullptr;
895 if (SI.getNumCases() > 0 &&
896 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
897 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
898 }))
899 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
900 if (!DefaultExitBB) {
901 // If we're not unswitching the default, we need it to match any cases to
902 // have a common successor or if we have no cases it is the common
903 // successor.
904 if (SI.getNumCases() == 0)
905 CommonSuccBB = SI.getDefaultDest();
906 else if (SI.getDefaultDest() != CommonSuccBB)
907 CommonSuccBB = nullptr;
908 }
909
910 // Split the preheader, so that we know that there is a safe place to insert
911 // the switch.
912 BasicBlock *OldPH = L.getLoopPreheader();
913 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
914 OldPH->getTerminator()->eraseFromParent();
915
916 // Now add the unswitched switch. This new switch instruction inherits the
917 // debug location of the old switch, because it semantically replace the old
918 // one.
919 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
920 NewSI->setDebugLoc(SIW->getDebugLoc());
921 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
922
923 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
924 // First, we split any exit blocks with remaining in-loop predecessors. Then
925 // we update the PHIs in one of two ways depending on if there was a split.
926 // We walk in reverse so that we split in the same order as the cases
927 // appeared. This is purely for convenience of reading the resulting IR, but
928 // it doesn't cost anything really.
929 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
931 // Handle the default exit if necessary.
932 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
933 // ranges aren't quite powerful enough yet.
934 if (DefaultExitBB) {
935 if (pred_empty(DefaultExitBB)) {
936 UnswitchedExitBBs.insert(DefaultExitBB);
937 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
938 } else {
939 auto *SplitBB =
940 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
941 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
942 *ParentBB, *OldPH,
943 /*FullUnswitch*/ true);
944 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
945 }
946 }
947 // Note that we must use a reference in the for loop so that we update the
948 // container.
949 for (auto &ExitCase : reverse(ExitCases)) {
950 // Grab a reference to the exit block in the pair so that we can update it.
951 BasicBlock *ExitBB = std::get<1>(ExitCase);
952
953 // If this case is the last edge into the exit block, we can simply reuse it
954 // as it will no longer be a loop exit. No mapping necessary.
955 if (pred_empty(ExitBB)) {
956 // Only rewrite once.
957 if (UnswitchedExitBBs.insert(ExitBB).second)
958 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
959 continue;
960 }
961
962 // Otherwise we need to split the exit block so that we retain an exit
963 // block from the loop and a target for the unswitched condition.
964 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
965 if (!SplitExitBB) {
966 // If this is the first time we see this, do the split and remember it.
967 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
968 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
969 *ParentBB, *OldPH,
970 /*FullUnswitch*/ true);
971 }
972 // Update the case pair to point to the split block.
973 std::get<1>(ExitCase) = SplitExitBB;
974 }
975
976 // Now add the unswitched cases. We do this in reverse order as we built them
977 // in reverse order.
978 for (auto &ExitCase : reverse(ExitCases)) {
979 ConstantInt *CaseVal = std::get<0>(ExitCase);
980 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
981
982 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
983 }
984
985 // If the default was unswitched, re-point it and add explicit cases for
986 // entering the loop.
987 if (DefaultExitBB) {
988 NewSIW->setDefaultDest(DefaultExitBB);
989 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
990
991 // We removed all the exit cases, so we just copy the cases to the
992 // unswitched switch.
993 for (const auto &Case : SI.cases())
994 NewSIW.addCase(Case.getCaseValue(), NewPH,
996 } else if (DefaultCaseWeight) {
997 // We have to set branch weight of the default case.
998 uint64_t SW = *DefaultCaseWeight;
999 for (const auto &Case : SI.cases()) {
1000 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
1001 assert(W &&
1002 "case weight must be defined as default case weight is defined");
1003 SW += *W;
1004 }
1005 NewSIW.setSuccessorWeight(0, SW);
1006 }
1007
1008 // If we ended up with a common successor for every path through the switch
1009 // after unswitching, rewrite it to an unconditional branch to make it easy
1010 // to recognize. Otherwise we potentially have to recognize the default case
1011 // pointing at unreachable and other complexity.
1012 if (CommonSuccBB) {
1013 BasicBlock *BB = SI.getParent();
1014 // We may have had multiple edges to this common successor block, so remove
1015 // them as predecessors. We skip the first one, either the default or the
1016 // actual first case.
1017 bool SkippedFirst = DefaultExitBB == nullptr;
1018 for (auto Case : SI.cases()) {
1019 assert(Case.getCaseSuccessor() == CommonSuccBB &&
1020 "Non-common successor!");
1021 (void)Case;
1022 if (!SkippedFirst) {
1023 SkippedFirst = true;
1024 continue;
1025 }
1026 CommonSuccBB->removePredecessor(BB,
1027 /*KeepOneInputPHIs*/ true);
1028 }
1029 // Now nuke the switch and replace it with a direct branch.
1030 Instruction *NewBI = BranchInst::Create(CommonSuccBB, BB);
1031 NewBI->setDebugLoc(SIW->getDebugLoc());
1032 SIW.eraseFromParent();
1033 } else if (DefaultExitBB) {
1034 assert(SI.getNumCases() > 0 &&
1035 "If we had no cases we'd have a common successor!");
1036 // Move the last case to the default successor. This is valid as if the
1037 // default got unswitched it cannot be reached. This has the advantage of
1038 // being simple and keeping the number of edges from this switch to
1039 // successors the same, and avoiding any PHI update complexity.
1040 auto LastCaseI = std::prev(SI.case_end());
1041
1042 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1044 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
1045 SIW.removeCase(LastCaseI);
1046 }
1047
1048 // Walk the unswitched exit blocks and the unswitched split blocks and update
1049 // the dominator tree based on the CFG edits. While we are walking unordered
1050 // containers here, the API for applyUpdates takes an unordered list of
1051 // updates and requires them to not contain duplicates.
1053 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1054 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1055 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1056 }
1057 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1058 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1059 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1060 }
1061
1062 if (MSSAU) {
1063 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1064 if (VerifyMemorySSA)
1065 MSSAU->getMemorySSA()->verifyMemorySSA();
1066 } else {
1067 DT.applyUpdates(DTUpdates);
1068 }
1069
1070 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1071
1072 // We may have changed the nesting relationship for this loop so hoist it to
1073 // its correct parent if needed.
1074 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1075
1076 if (MSSAU && VerifyMemorySSA)
1077 MSSAU->getMemorySSA()->verifyMemorySSA();
1078
1079 ++NumTrivial;
1080 ++NumSwitches;
1081 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1082 return true;
1083}
1084
1085/// This routine scans the loop to find a branch or switch which occurs before
1086/// any side effects occur. These can potentially be unswitched without
1087/// duplicating the loop. If a branch or switch is successfully unswitched the
1088/// scanning continues to see if subsequent branches or switches have become
1089/// trivial. Once all trivial candidates have been unswitched, this routine
1090/// returns.
1091///
1092/// The return value indicates whether anything was unswitched (and therefore
1093/// changed).
1094///
1095/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1096/// invalidated by this.
1098 LoopInfo &LI, ScalarEvolution *SE,
1099 MemorySSAUpdater *MSSAU) {
1100 bool Changed = false;
1101
1102 // If loop header has only one reachable successor we should keep looking for
1103 // trivial condition candidates in the successor as well. An alternative is
1104 // to constant fold conditions and merge successors into loop header (then we
1105 // only need to check header's terminator). The reason for not doing this in
1106 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1107 // invariants. Folding dead branches could either eliminate the current loop
1108 // or make other loops unreachable. LCSSA form might also not be preserved
1109 // after deleting branches. The following code keeps traversing loop header's
1110 // successors until it finds the trivial condition candidate (condition that
1111 // is not a constant). Since unswitching generates branches with constant
1112 // conditions, this scenario could be very common in practice.
1113 BasicBlock *CurrentBB = L.getHeader();
1115 Visited.insert(CurrentBB);
1116 do {
1117 // Check if there are any side-effecting instructions (e.g. stores, calls,
1118 // volatile loads) in the part of the loop that the code *would* execute
1119 // without unswitching.
1120 if (MSSAU) // Possible early exit with MSSA
1121 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1122 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1123 return Changed;
1124 if (llvm::any_of(*CurrentBB,
1125 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1126 return Changed;
1127
1128 Instruction *CurrentTerm = CurrentBB->getTerminator();
1129
1130 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1131 // Don't bother trying to unswitch past a switch with a constant
1132 // condition. This should be removed prior to running this pass by
1133 // simplifycfg.
1134 if (isa<Constant>(SI->getCondition()))
1135 return Changed;
1136
1137 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1138 // Couldn't unswitch this one so we're done.
1139 return Changed;
1140
1141 // Mark that we managed to unswitch something.
1142 Changed = true;
1143
1144 // If unswitching turned the terminator into an unconditional branch then
1145 // we can continue. The unswitching logic specifically works to fold any
1146 // cases it can into an unconditional branch to make it easier to
1147 // recognize here.
1148 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1149 if (!BI || BI->isConditional())
1150 return Changed;
1151
1152 CurrentBB = BI->getSuccessor(0);
1153 continue;
1154 }
1155
1156 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1157 if (!BI)
1158 // We do not understand other terminator instructions.
1159 return Changed;
1160
1161 // Don't bother trying to unswitch past an unconditional branch or a branch
1162 // with a constant value. These should be removed by simplifycfg prior to
1163 // running this pass.
1164 if (!BI->isConditional() ||
1165 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1166 return Changed;
1167
1168 // Found a trivial condition candidate: non-foldable conditional branch. If
1169 // we fail to unswitch this, we can't do anything else that is trivial.
1170 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1171 return Changed;
1172
1173 // Mark that we managed to unswitch something.
1174 Changed = true;
1175
1176 // If we only unswitched some of the conditions feeding the branch, we won't
1177 // have collapsed it to a single successor.
1178 BI = cast<BranchInst>(CurrentBB->getTerminator());
1179 if (BI->isConditional())
1180 return Changed;
1181
1182 // Follow the newly unconditional branch into its successor.
1183 CurrentBB = BI->getSuccessor(0);
1184
1185 // When continuing, if we exit the loop or reach a previous visited block,
1186 // then we can not reach any trivial condition candidates (unfoldable
1187 // branch instructions or switch instructions) and no unswitch can happen.
1188 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1189
1190 return Changed;
1191}
1192
1193/// Build the cloned blocks for an unswitched copy of the given loop.
1194///
1195/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1196/// after the split block (`SplitBB`) that will be used to select between the
1197/// cloned and original loop.
1198///
1199/// This routine handles cloning all of the necessary loop blocks and exit
1200/// blocks including rewriting their instructions and the relevant PHI nodes.
1201/// Any loop blocks or exit blocks which are dominated by a different successor
1202/// than the one for this clone of the loop blocks can be trivially skipped. We
1203/// use the `DominatingSucc` map to determine whether a block satisfies that
1204/// property with a simple map lookup.
1205///
1206/// It also correctly creates the unconditional branch in the cloned
1207/// unswitched parent block to only point at the unswitched successor.
1208///
1209/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1210/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1211/// the cloned blocks (and their loops) are left without full `LoopInfo`
1212/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1213/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1214/// instead the caller must recompute an accurate DT. It *does* correctly
1215/// update the `AssumptionCache` provided in `AC`.
1217 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1218 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1219 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1221 ValueToValueMapTy &VMap,
1223 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1224 ScalarEvolution *SE) {
1226 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1227
1228 // We will need to clone a bunch of blocks, wrap up the clone operation in
1229 // a helper.
1230 auto CloneBlock = [&](BasicBlock *OldBB) {
1231 // Clone the basic block and insert it before the new preheader.
1232 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1233 NewBB->moveBefore(LoopPH);
1234
1235 // Record this block and the mapping.
1236 NewBlocks.push_back(NewBB);
1237 VMap[OldBB] = NewBB;
1238
1239 return NewBB;
1240 };
1241
1242 // We skip cloning blocks when they have a dominating succ that is not the
1243 // succ we are cloning for.
1244 auto SkipBlock = [&](BasicBlock *BB) {
1245 auto It = DominatingSucc.find(BB);
1246 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1247 };
1248
1249 // First, clone the preheader.
1250 auto *ClonedPH = CloneBlock(LoopPH);
1251
1252 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1253 for (auto *LoopBB : L.blocks())
1254 if (!SkipBlock(LoopBB))
1255 CloneBlock(LoopBB);
1256
1257 // Split all the loop exit edges so that when we clone the exit blocks, if
1258 // any of the exit blocks are *also* a preheader for some other loop, we
1259 // don't create multiple predecessors entering the loop header.
1260 for (auto *ExitBB : ExitBlocks) {
1261 if (SkipBlock(ExitBB))
1262 continue;
1263
1264 // When we are going to clone an exit, we don't need to clone all the
1265 // instructions in the exit block and we want to ensure we have an easy
1266 // place to merge the CFG, so split the exit first. This is always safe to
1267 // do because there cannot be any non-loop predecessors of a loop exit in
1268 // loop simplified form.
1269 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1270
1271 // Rearrange the names to make it easier to write test cases by having the
1272 // exit block carry the suffix rather than the merge block carrying the
1273 // suffix.
1274 MergeBB->takeName(ExitBB);
1275 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1276
1277 // Now clone the original exit block.
1278 auto *ClonedExitBB = CloneBlock(ExitBB);
1279 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1280 "Exit block should have been split to have one successor!");
1281 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1282 "Cloned exit block has the wrong successor!");
1283
1284 // Remap any cloned instructions and create a merge phi node for them.
1285 for (auto ZippedInsts : llvm::zip_first(
1286 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1287 llvm::make_range(ClonedExitBB->begin(),
1288 std::prev(ClonedExitBB->end())))) {
1289 Instruction &I = std::get<0>(ZippedInsts);
1290 Instruction &ClonedI = std::get<1>(ZippedInsts);
1291
1292 // The only instructions in the exit block should be PHI nodes and
1293 // potentially a landing pad.
1294 assert(
1296 "Bad instruction in exit block!");
1297 // We should have a value map between the instruction and its clone.
1298 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1299
1300 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1301 if (SE)
1302 if (auto *PN = dyn_cast<PHINode>(&I))
1304
1305 BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1306
1307 auto *MergePN =
1308 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1309 MergePN->insertBefore(InsertPt);
1310 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1311 I.replaceAllUsesWith(MergePN);
1312 MergePN->addIncoming(&I, ExitBB);
1313 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1314 }
1315 }
1316
1317 // Rewrite the instructions in the cloned blocks to refer to the instructions
1318 // in the cloned blocks. We have to do this as a second pass so that we have
1319 // everything available. Also, we have inserted new instructions which may
1320 // include assume intrinsics, so we update the assumption cache while
1321 // processing this.
1322 Module *M = ClonedPH->getParent()->getParent();
1323 for (auto *ClonedBB : NewBlocks)
1324 for (Instruction &I : *ClonedBB) {
1325 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1327 RemapInstruction(&I, VMap,
1329 if (auto *II = dyn_cast<AssumeInst>(&I))
1331 }
1332
1333 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1334 // have spurious incoming values.
1335 for (auto *LoopBB : L.blocks())
1336 if (SkipBlock(LoopBB))
1337 for (auto *SuccBB : successors(LoopBB))
1338 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1339 for (PHINode &PN : ClonedSuccBB->phis())
1340 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1341
1342 // Remove the cloned parent as a predecessor of any successor we ended up
1343 // cloning other than the unswitched one.
1344 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1345 for (auto *SuccBB : successors(ParentBB)) {
1346 if (SuccBB == UnswitchedSuccBB)
1347 continue;
1348
1349 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1350 if (!ClonedSuccBB)
1351 continue;
1352
1353 ClonedSuccBB->removePredecessor(ClonedParentBB,
1354 /*KeepOneInputPHIs*/ true);
1355 }
1356
1357 // Replace the cloned branch with an unconditional branch to the cloned
1358 // unswitched successor.
1359 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1360 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1361 // Trivial Simplification. If Terminator is a conditional branch and
1362 // condition becomes dead - erase it.
1363 Value *ClonedConditionToErase = nullptr;
1364 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1365 ClonedConditionToErase = BI->getCondition();
1366 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1367 ClonedConditionToErase = SI->getCondition();
1368
1369 Instruction *BI = BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1370 BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1371 ClonedTerminator->eraseFromParent();
1372
1373 if (ClonedConditionToErase)
1374 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1375 MSSAU);
1376
1377 // If there are duplicate entries in the PHI nodes because of multiple edges
1378 // to the unswitched successor, we need to nuke all but one as we replaced it
1379 // with a direct branch.
1380 for (PHINode &PN : ClonedSuccBB->phis()) {
1381 bool Found = false;
1382 // Loop over the incoming operands backwards so we can easily delete as we
1383 // go without invalidating the index.
1384 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1385 if (PN.getIncomingBlock(i) != ClonedParentBB)
1386 continue;
1387 if (!Found) {
1388 Found = true;
1389 continue;
1390 }
1391 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1392 }
1393 }
1394
1395 // Record the domtree updates for the new blocks.
1397 for (auto *ClonedBB : NewBlocks) {
1398 for (auto *SuccBB : successors(ClonedBB))
1399 if (SuccSet.insert(SuccBB).second)
1400 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1401 SuccSet.clear();
1402 }
1403
1404 return ClonedPH;
1405}
1406
1407/// Recursively clone the specified loop and all of its children.
1408///
1409/// The target parent loop for the clone should be provided, or can be null if
1410/// the clone is a top-level loop. While cloning, all the blocks are mapped
1411/// with the provided value map. The entire original loop must be present in
1412/// the value map. The cloned loop is returned.
1413static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1414 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1415 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1416 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1417 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1418 for (auto *BB : OrigL.blocks()) {
1419 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1420 ClonedL.addBlockEntry(ClonedBB);
1421 if (LI.getLoopFor(BB) == &OrigL)
1422 LI.changeLoopFor(ClonedBB, &ClonedL);
1423 }
1424 };
1425
1426 // We specially handle the first loop because it may get cloned into
1427 // a different parent and because we most commonly are cloning leaf loops.
1428 Loop *ClonedRootL = LI.AllocateLoop();
1429 if (RootParentL)
1430 RootParentL->addChildLoop(ClonedRootL);
1431 else
1432 LI.addTopLevelLoop(ClonedRootL);
1433 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1434
1435 if (OrigRootL.isInnermost())
1436 return ClonedRootL;
1437
1438 // If we have a nest, we can quickly clone the entire loop nest using an
1439 // iterative approach because it is a tree. We keep the cloned parent in the
1440 // data structure to avoid repeatedly querying through a map to find it.
1441 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1442 // Build up the loops to clone in reverse order as we'll clone them from the
1443 // back.
1444 for (Loop *ChildL : llvm::reverse(OrigRootL))
1445 LoopsToClone.push_back({ClonedRootL, ChildL});
1446 do {
1447 Loop *ClonedParentL, *L;
1448 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1449 Loop *ClonedL = LI.AllocateLoop();
1450 ClonedParentL->addChildLoop(ClonedL);
1451 AddClonedBlocksToLoop(*L, *ClonedL);
1452 for (Loop *ChildL : llvm::reverse(*L))
1453 LoopsToClone.push_back({ClonedL, ChildL});
1454 } while (!LoopsToClone.empty());
1455
1456 return ClonedRootL;
1457}
1458
1459/// Build the cloned loops of an original loop from unswitching.
1460///
1461/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1462/// operation. We need to re-verify that there even is a loop (as the backedge
1463/// may not have been cloned), and even if there are remaining backedges the
1464/// backedge set may be different. However, we know that each child loop is
1465/// undisturbed, we only need to find where to place each child loop within
1466/// either any parent loop or within a cloned version of the original loop.
1467///
1468/// Because child loops may end up cloned outside of any cloned version of the
1469/// original loop, multiple cloned sibling loops may be created. All of them
1470/// are returned so that the newly introduced loop nest roots can be
1471/// identified.
1472static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1473 const ValueToValueMapTy &VMap, LoopInfo &LI,
1474 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1475 Loop *ClonedL = nullptr;
1476
1477 auto *OrigPH = OrigL.getLoopPreheader();
1478 auto *OrigHeader = OrigL.getHeader();
1479
1480 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1481 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1482
1483 // We need to know the loops of the cloned exit blocks to even compute the
1484 // accurate parent loop. If we only clone exits to some parent of the
1485 // original parent, we want to clone into that outer loop. We also keep track
1486 // of the loops that our cloned exit blocks participate in.
1487 Loop *ParentL = nullptr;
1488 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1490 ClonedExitsInLoops.reserve(ExitBlocks.size());
1491 for (auto *ExitBB : ExitBlocks)
1492 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1493 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1494 ExitLoopMap[ClonedExitBB] = ExitL;
1495 ClonedExitsInLoops.push_back(ClonedExitBB);
1496 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1497 ParentL = ExitL;
1498 }
1499 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1500 ParentL->contains(OrigL.getParentLoop())) &&
1501 "The computed parent loop should always contain (or be) the parent of "
1502 "the original loop.");
1503
1504 // We build the set of blocks dominated by the cloned header from the set of
1505 // cloned blocks out of the original loop. While not all of these will
1506 // necessarily be in the cloned loop, it is enough to establish that they
1507 // aren't in unreachable cycles, etc.
1508 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1509 for (auto *BB : OrigL.blocks())
1510 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1511 ClonedLoopBlocks.insert(ClonedBB);
1512
1513 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1514 // skipped cloning some region of this loop which can in turn skip some of
1515 // the backedges so we have to rebuild the blocks in the loop based on the
1516 // backedges that remain after cloning.
1518 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1519 for (auto *Pred : predecessors(ClonedHeader)) {
1520 // The only possible non-loop header predecessor is the preheader because
1521 // we know we cloned the loop in simplified form.
1522 if (Pred == ClonedPH)
1523 continue;
1524
1525 // Because the loop was in simplified form, the only non-loop predecessor
1526 // should be the preheader.
1527 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1528 "header other than the preheader "
1529 "that is not part of the loop!");
1530
1531 // Insert this block into the loop set and on the first visit (and if it
1532 // isn't the header we're currently walking) put it into the worklist to
1533 // recurse through.
1534 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1535 Worklist.push_back(Pred);
1536 }
1537
1538 // If we had any backedges then there *is* a cloned loop. Put the header into
1539 // the loop set and then walk the worklist backwards to find all the blocks
1540 // that remain within the loop after cloning.
1541 if (!BlocksInClonedLoop.empty()) {
1542 BlocksInClonedLoop.insert(ClonedHeader);
1543
1544 while (!Worklist.empty()) {
1545 BasicBlock *BB = Worklist.pop_back_val();
1546 assert(BlocksInClonedLoop.count(BB) &&
1547 "Didn't put block into the loop set!");
1548
1549 // Insert any predecessors that are in the possible set into the cloned
1550 // set, and if the insert is successful, add them to the worklist. Note
1551 // that we filter on the blocks that are definitely reachable via the
1552 // backedge to the loop header so we may prune out dead code within the
1553 // cloned loop.
1554 for (auto *Pred : predecessors(BB))
1555 if (ClonedLoopBlocks.count(Pred) &&
1556 BlocksInClonedLoop.insert(Pred).second)
1557 Worklist.push_back(Pred);
1558 }
1559
1560 ClonedL = LI.AllocateLoop();
1561 if (ParentL) {
1562 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1563 ParentL->addChildLoop(ClonedL);
1564 } else {
1565 LI.addTopLevelLoop(ClonedL);
1566 }
1567 NonChildClonedLoops.push_back(ClonedL);
1568
1569 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1570 // We don't want to just add the cloned loop blocks based on how we
1571 // discovered them. The original order of blocks was carefully built in
1572 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1573 // that logic, we just re-walk the original blocks (and those of the child
1574 // loops) and filter them as we add them into the cloned loop.
1575 for (auto *BB : OrigL.blocks()) {
1576 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1577 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1578 continue;
1579
1580 // Directly add the blocks that are only in this loop.
1581 if (LI.getLoopFor(BB) == &OrigL) {
1582 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1583 continue;
1584 }
1585
1586 // We want to manually add it to this loop and parents.
1587 // Registering it with LoopInfo will happen when we clone the top
1588 // loop for this block.
1589 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1590 PL->addBlockEntry(ClonedBB);
1591 }
1592
1593 // Now add each child loop whose header remains within the cloned loop. All
1594 // of the blocks within the loop must satisfy the same constraints as the
1595 // header so once we pass the header checks we can just clone the entire
1596 // child loop nest.
1597 for (Loop *ChildL : OrigL) {
1598 auto *ClonedChildHeader =
1599 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1600 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1601 continue;
1602
1603#ifndef NDEBUG
1604 // We should never have a cloned child loop header but fail to have
1605 // all of the blocks for that child loop.
1606 for (auto *ChildLoopBB : ChildL->blocks())
1607 assert(BlocksInClonedLoop.count(
1608 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1609 "Child cloned loop has a header within the cloned outer "
1610 "loop but not all of its blocks!");
1611#endif
1612
1613 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1614 }
1615 }
1616
1617 // Now that we've handled all the components of the original loop that were
1618 // cloned into a new loop, we still need to handle anything from the original
1619 // loop that wasn't in a cloned loop.
1620
1621 // Figure out what blocks are left to place within any loop nest containing
1622 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1623 // them.
1624 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1625 if (BlocksInClonedLoop.empty())
1626 UnloopedBlockSet.insert(ClonedPH);
1627 for (auto *ClonedBB : ClonedLoopBlocks)
1628 if (!BlocksInClonedLoop.count(ClonedBB))
1629 UnloopedBlockSet.insert(ClonedBB);
1630
1631 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1632 // backwards across these to process them inside out. The order shouldn't
1633 // matter as we're just trying to build up the map from inside-out; we use
1634 // the map in a more stably ordered way below.
1635 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1636 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1637 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1638 ExitLoopMap.lookup(RHS)->getLoopDepth();
1639 });
1640
1641 // Populate the existing ExitLoopMap with everything reachable from each
1642 // exit, starting from the inner most exit.
1643 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1644 assert(Worklist.empty() && "Didn't clear worklist!");
1645
1646 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1647 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1648
1649 // Walk the CFG back until we hit the cloned PH adding everything reachable
1650 // and in the unlooped set to this exit block's loop.
1651 Worklist.push_back(ExitBB);
1652 do {
1653 BasicBlock *BB = Worklist.pop_back_val();
1654 // We can stop recursing at the cloned preheader (if we get there).
1655 if (BB == ClonedPH)
1656 continue;
1657
1658 for (BasicBlock *PredBB : predecessors(BB)) {
1659 // If this pred has already been moved to our set or is part of some
1660 // (inner) loop, no update needed.
1661 if (!UnloopedBlockSet.erase(PredBB)) {
1662 assert(
1663 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1664 "Predecessor not mapped to a loop!");
1665 continue;
1666 }
1667
1668 // We just insert into the loop set here. We'll add these blocks to the
1669 // exit loop after we build up the set in an order that doesn't rely on
1670 // predecessor order (which in turn relies on use list order).
1671 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1672 (void)Inserted;
1673 assert(Inserted && "Should only visit an unlooped block once!");
1674
1675 // And recurse through to its predecessors.
1676 Worklist.push_back(PredBB);
1677 }
1678 } while (!Worklist.empty());
1679 }
1680
1681 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1682 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1683 // in their original order adding them to the correct loop.
1684
1685 // We need a stable insertion order. We use the order of the original loop
1686 // order and map into the correct parent loop.
1687 for (auto *BB : llvm::concat<BasicBlock *const>(
1688 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1689 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1690 OuterL->addBasicBlockToLoop(BB, LI);
1691
1692#ifndef NDEBUG
1693 for (auto &BBAndL : ExitLoopMap) {
1694 auto *BB = BBAndL.first;
1695 auto *OuterL = BBAndL.second;
1696 assert(LI.getLoopFor(BB) == OuterL &&
1697 "Failed to put all blocks into outer loops!");
1698 }
1699#endif
1700
1701 // Now that all the blocks are placed into the correct containing loop in the
1702 // absence of child loops, find all the potentially cloned child loops and
1703 // clone them into whatever outer loop we placed their header into.
1704 for (Loop *ChildL : OrigL) {
1705 auto *ClonedChildHeader =
1706 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1707 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1708 continue;
1709
1710#ifndef NDEBUG
1711 for (auto *ChildLoopBB : ChildL->blocks())
1712 assert(VMap.count(ChildLoopBB) &&
1713 "Cloned a child loop header but not all of that loops blocks!");
1714#endif
1715
1716 NonChildClonedLoops.push_back(cloneLoopNest(
1717 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1718 }
1719}
1720
1721static void
1723 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1724 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1725 // Find all the dead clones, and remove them from their successors.
1727 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1728 for (const auto &VMap : VMaps)
1729 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1730 if (!DT.isReachableFromEntry(ClonedBB)) {
1731 for (BasicBlock *SuccBB : successors(ClonedBB))
1732 SuccBB->removePredecessor(ClonedBB);
1733 DeadBlocks.push_back(ClonedBB);
1734 }
1735
1736 // Remove all MemorySSA in the dead blocks
1737 if (MSSAU) {
1738 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1739 DeadBlocks.end());
1740 MSSAU->removeBlocks(DeadBlockSet);
1741 }
1742
1743 // Drop any remaining references to break cycles.
1744 for (BasicBlock *BB : DeadBlocks)
1745 BB->dropAllReferences();
1746 // Erase them from the IR.
1747 for (BasicBlock *BB : DeadBlocks)
1748 BB->eraseFromParent();
1749}
1750
1753 DominatorTree &DT, LoopInfo &LI,
1754 MemorySSAUpdater *MSSAU,
1755 ScalarEvolution *SE,
1756 LPMUpdater &LoopUpdater) {
1757 // Find all the dead blocks tied to this loop, and remove them from their
1758 // successors.
1760
1761 // Start with loop/exit blocks and get a transitive closure of reachable dead
1762 // blocks.
1763 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1764 ExitBlocks.end());
1765 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1766 while (!DeathCandidates.empty()) {
1767 auto *BB = DeathCandidates.pop_back_val();
1768 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1769 for (BasicBlock *SuccBB : successors(BB)) {
1770 SuccBB->removePredecessor(BB);
1771 DeathCandidates.push_back(SuccBB);
1772 }
1773 DeadBlockSet.insert(BB);
1774 }
1775 }
1776
1777 // Remove all MemorySSA in the dead blocks
1778 if (MSSAU)
1779 MSSAU->removeBlocks(DeadBlockSet);
1780
1781 // Filter out the dead blocks from the exit blocks list so that it can be
1782 // used in the caller.
1783 llvm::erase_if(ExitBlocks,
1784 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1785
1786 // Walk from this loop up through its parents removing all of the dead blocks.
1787 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1788 for (auto *BB : DeadBlockSet)
1789 ParentL->getBlocksSet().erase(BB);
1790 llvm::erase_if(ParentL->getBlocksVector(),
1791 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1792 }
1793
1794 // Now delete the dead child loops. This raw delete will clear them
1795 // recursively.
1796 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1797 if (!DeadBlockSet.count(ChildL->getHeader()))
1798 return false;
1799
1800 assert(llvm::all_of(ChildL->blocks(),
1801 [&](BasicBlock *ChildBB) {
1802 return DeadBlockSet.count(ChildBB);
1803 }) &&
1804 "If the child loop header is dead all blocks in the child loop must "
1805 "be dead as well!");
1806 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1807 if (SE)
1809 LI.destroy(ChildL);
1810 return true;
1811 });
1812
1813 // Remove the loop mappings for the dead blocks and drop all the references
1814 // from these blocks to others to handle cyclic references as we start
1815 // deleting the blocks themselves.
1816 for (auto *BB : DeadBlockSet) {
1817 // Check that the dominator tree has already been updated.
1818 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1819 LI.changeLoopFor(BB, nullptr);
1820 // Drop all uses of the instructions to make sure we won't have dangling
1821 // uses in other blocks.
1822 for (auto &I : *BB)
1823 if (!I.use_empty())
1824 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1825 BB->dropAllReferences();
1826 }
1827
1828 // Actually delete the blocks now that they've been fully unhooked from the
1829 // IR.
1830 for (auto *BB : DeadBlockSet)
1831 BB->eraseFromParent();
1832}
1833
1834/// Recompute the set of blocks in a loop after unswitching.
1835///
1836/// This walks from the original headers predecessors to rebuild the loop. We
1837/// take advantage of the fact that new blocks can't have been added, and so we
1838/// filter by the original loop's blocks. This also handles potentially
1839/// unreachable code that we don't want to explore but might be found examining
1840/// the predecessors of the header.
1841///
1842/// If the original loop is no longer a loop, this will return an empty set. If
1843/// it remains a loop, all the blocks within it will be added to the set
1844/// (including those blocks in inner loops).
1846 LoopInfo &LI) {
1848
1849 auto *PH = L.getLoopPreheader();
1850 auto *Header = L.getHeader();
1851
1852 // A worklist to use while walking backwards from the header.
1854
1855 // First walk the predecessors of the header to find the backedges. This will
1856 // form the basis of our walk.
1857 for (auto *Pred : predecessors(Header)) {
1858 // Skip the preheader.
1859 if (Pred == PH)
1860 continue;
1861
1862 // Because the loop was in simplified form, the only non-loop predecessor
1863 // is the preheader.
1864 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1865 "than the preheader that is not part of the "
1866 "loop!");
1867
1868 // Insert this block into the loop set and on the first visit and, if it
1869 // isn't the header we're currently walking, put it into the worklist to
1870 // recurse through.
1871 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1872 Worklist.push_back(Pred);
1873 }
1874
1875 // If no backedges were found, we're done.
1876 if (LoopBlockSet.empty())
1877 return LoopBlockSet;
1878
1879 // We found backedges, recurse through them to identify the loop blocks.
1880 while (!Worklist.empty()) {
1881 BasicBlock *BB = Worklist.pop_back_val();
1882 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1883
1884 // No need to walk past the header.
1885 if (BB == Header)
1886 continue;
1887
1888 // Because we know the inner loop structure remains valid we can use the
1889 // loop structure to jump immediately across the entire nested loop.
1890 // Further, because it is in loop simplified form, we can directly jump
1891 // to its preheader afterward.
1892 if (Loop *InnerL = LI.getLoopFor(BB))
1893 if (InnerL != &L) {
1894 assert(L.contains(InnerL) &&
1895 "Should not reach a loop *outside* this loop!");
1896 // The preheader is the only possible predecessor of the loop so
1897 // insert it into the set and check whether it was already handled.
1898 auto *InnerPH = InnerL->getLoopPreheader();
1899 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1900 "but not contain the inner loop "
1901 "preheader!");
1902 if (!LoopBlockSet.insert(InnerPH).second)
1903 // The only way to reach the preheader is through the loop body
1904 // itself so if it has been visited the loop is already handled.
1905 continue;
1906
1907 // Insert all of the blocks (other than those already present) into
1908 // the loop set. We expect at least the block that led us to find the
1909 // inner loop to be in the block set, but we may also have other loop
1910 // blocks if they were already enqueued as predecessors of some other
1911 // outer loop block.
1912 for (auto *InnerBB : InnerL->blocks()) {
1913 if (InnerBB == BB) {
1914 assert(LoopBlockSet.count(InnerBB) &&
1915 "Block should already be in the set!");
1916 continue;
1917 }
1918
1919 LoopBlockSet.insert(InnerBB);
1920 }
1921
1922 // Add the preheader to the worklist so we will continue past the
1923 // loop body.
1924 Worklist.push_back(InnerPH);
1925 continue;
1926 }
1927
1928 // Insert any predecessors that were in the original loop into the new
1929 // set, and if the insert is successful, add them to the worklist.
1930 for (auto *Pred : predecessors(BB))
1931 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1932 Worklist.push_back(Pred);
1933 }
1934
1935 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1936
1937 // We've found all the blocks participating in the loop, return our completed
1938 // set.
1939 return LoopBlockSet;
1940}
1941
1942/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1943///
1944/// The removal may have removed some child loops entirely but cannot have
1945/// disturbed any remaining child loops. However, they may need to be hoisted
1946/// to the parent loop (or to be top-level loops). The original loop may be
1947/// completely removed.
1948///
1949/// The sibling loops resulting from this update are returned. If the original
1950/// loop remains a valid loop, it will be the first entry in this list with all
1951/// of the newly sibling loops following it.
1952///
1953/// Returns true if the loop remains a loop after unswitching, and false if it
1954/// is no longer a loop after unswitching (and should not continue to be
1955/// referenced).
1957 LoopInfo &LI,
1958 SmallVectorImpl<Loop *> &HoistedLoops,
1959 ScalarEvolution *SE) {
1960 auto *PH = L.getLoopPreheader();
1961
1962 // Compute the actual parent loop from the exit blocks. Because we may have
1963 // pruned some exits the loop may be different from the original parent.
1964 Loop *ParentL = nullptr;
1965 SmallVector<Loop *, 4> ExitLoops;
1966 SmallVector<BasicBlock *, 4> ExitsInLoops;
1967 ExitsInLoops.reserve(ExitBlocks.size());
1968 for (auto *ExitBB : ExitBlocks)
1969 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1970 ExitLoops.push_back(ExitL);
1971 ExitsInLoops.push_back(ExitBB);
1972 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1973 ParentL = ExitL;
1974 }
1975
1976 // Recompute the blocks participating in this loop. This may be empty if it
1977 // is no longer a loop.
1978 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1979
1980 // If we still have a loop, we need to re-set the loop's parent as the exit
1981 // block set changing may have moved it within the loop nest. Note that this
1982 // can only happen when this loop has a parent as it can only hoist the loop
1983 // *up* the nest.
1984 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1985 // Remove this loop's (original) blocks from all of the intervening loops.
1986 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1987 IL = IL->getParentLoop()) {
1988 IL->getBlocksSet().erase(PH);
1989 for (auto *BB : L.blocks())
1990 IL->getBlocksSet().erase(BB);
1991 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1992 return BB == PH || L.contains(BB);
1993 });
1994 }
1995
1996 LI.changeLoopFor(PH, ParentL);
1997 L.getParentLoop()->removeChildLoop(&L);
1998 if (ParentL)
1999 ParentL->addChildLoop(&L);
2000 else
2001 LI.addTopLevelLoop(&L);
2002 }
2003
2004 // Now we update all the blocks which are no longer within the loop.
2005 auto &Blocks = L.getBlocksVector();
2006 auto BlocksSplitI =
2007 LoopBlockSet.empty()
2008 ? Blocks.begin()
2009 : std::stable_partition(
2010 Blocks.begin(), Blocks.end(),
2011 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
2012
2013 // Before we erase the list of unlooped blocks, build a set of them.
2014 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
2015 if (LoopBlockSet.empty())
2016 UnloopedBlocks.insert(PH);
2017
2018 // Now erase these blocks from the loop.
2019 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
2020 L.getBlocksSet().erase(BB);
2021 Blocks.erase(BlocksSplitI, Blocks.end());
2022
2023 // Sort the exits in ascending loop depth, we'll work backwards across these
2024 // to process them inside out.
2025 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
2026 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
2027 });
2028
2029 // We'll build up a set for each exit loop.
2030 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
2031 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
2032
2033 auto RemoveUnloopedBlocksFromLoop =
2034 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
2035 for (auto *BB : UnloopedBlocks)
2036 L.getBlocksSet().erase(BB);
2037 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
2038 return UnloopedBlocks.count(BB);
2039 });
2040 };
2041
2043 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
2044 assert(Worklist.empty() && "Didn't clear worklist!");
2045 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
2046
2047 // Grab the next exit block, in decreasing loop depth order.
2048 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
2049 Loop &ExitL = *LI.getLoopFor(ExitBB);
2050 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
2051
2052 // Erase all of the unlooped blocks from the loops between the previous
2053 // exit loop and this exit loop. This works because the ExitInLoops list is
2054 // sorted in increasing order of loop depth and thus we visit loops in
2055 // decreasing order of loop depth.
2056 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2057 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2058
2059 // Walk the CFG back until we hit the cloned PH adding everything reachable
2060 // and in the unlooped set to this exit block's loop.
2061 Worklist.push_back(ExitBB);
2062 do {
2063 BasicBlock *BB = Worklist.pop_back_val();
2064 // We can stop recursing at the cloned preheader (if we get there).
2065 if (BB == PH)
2066 continue;
2067
2068 for (BasicBlock *PredBB : predecessors(BB)) {
2069 // If this pred has already been moved to our set or is part of some
2070 // (inner) loop, no update needed.
2071 if (!UnloopedBlocks.erase(PredBB)) {
2072 assert((NewExitLoopBlocks.count(PredBB) ||
2073 ExitL.contains(LI.getLoopFor(PredBB))) &&
2074 "Predecessor not in a nested loop (or already visited)!");
2075 continue;
2076 }
2077
2078 // We just insert into the loop set here. We'll add these blocks to the
2079 // exit loop after we build up the set in a deterministic order rather
2080 // than the predecessor-influenced visit order.
2081 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2082 (void)Inserted;
2083 assert(Inserted && "Should only visit an unlooped block once!");
2084
2085 // And recurse through to its predecessors.
2086 Worklist.push_back(PredBB);
2087 }
2088 } while (!Worklist.empty());
2089
2090 // If blocks in this exit loop were directly part of the original loop (as
2091 // opposed to a child loop) update the map to point to this exit loop. This
2092 // just updates a map and so the fact that the order is unstable is fine.
2093 for (auto *BB : NewExitLoopBlocks)
2094 if (Loop *BBL = LI.getLoopFor(BB))
2095 if (BBL == &L || !L.contains(BBL))
2096 LI.changeLoopFor(BB, &ExitL);
2097
2098 // We will remove the remaining unlooped blocks from this loop in the next
2099 // iteration or below.
2100 NewExitLoopBlocks.clear();
2101 }
2102
2103 // Any remaining unlooped blocks are no longer part of any loop unless they
2104 // are part of some child loop.
2105 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2106 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2107 for (auto *BB : UnloopedBlocks)
2108 if (Loop *BBL = LI.getLoopFor(BB))
2109 if (BBL == &L || !L.contains(BBL))
2110 LI.changeLoopFor(BB, nullptr);
2111
2112 // Sink all the child loops whose headers are no longer in the loop set to
2113 // the parent (or to be top level loops). We reach into the loop and directly
2114 // update its subloop vector to make this batch update efficient.
2115 auto &SubLoops = L.getSubLoopsVector();
2116 auto SubLoopsSplitI =
2117 LoopBlockSet.empty()
2118 ? SubLoops.begin()
2119 : std::stable_partition(
2120 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2121 return LoopBlockSet.count(SubL->getHeader());
2122 });
2123 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2124 HoistedLoops.push_back(HoistedL);
2125 HoistedL->setParentLoop(nullptr);
2126
2127 // To compute the new parent of this hoisted loop we look at where we
2128 // placed the preheader above. We can't lookup the header itself because we
2129 // retained the mapping from the header to the hoisted loop. But the
2130 // preheader and header should have the exact same new parent computed
2131 // based on the set of exit blocks from the original loop as the preheader
2132 // is a predecessor of the header and so reached in the reverse walk. And
2133 // because the loops were all in simplified form the preheader of the
2134 // hoisted loop can't be part of some *other* loop.
2135 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2136 NewParentL->addChildLoop(HoistedL);
2137 else
2138 LI.addTopLevelLoop(HoistedL);
2139 }
2140 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2141
2142 // Actually delete the loop if nothing remained within it.
2143 if (Blocks.empty()) {
2144 assert(SubLoops.empty() &&
2145 "Failed to remove all subloops from the original loop!");
2146 if (Loop *ParentL = L.getParentLoop())
2147 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2148 else
2149 LI.removeLoop(llvm::find(LI, &L));
2150 // markLoopAsDeleted for L should be triggered by the caller (it is
2151 // typically done within postUnswitch).
2152 if (SE)
2154 LI.destroy(&L);
2155 return false;
2156 }
2157
2158 return true;
2159}
2160
2161/// Helper to visit a dominator subtree, invoking a callable on each node.
2162///
2163/// Returning false at any point will stop walking past that node of the tree.
2164template <typename CallableT>
2165void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2167 DomWorklist.push_back(DT[BB]);
2168#ifndef NDEBUG
2170 Visited.insert(DT[BB]);
2171#endif
2172 do {
2173 DomTreeNode *N = DomWorklist.pop_back_val();
2174
2175 // Visit this node.
2176 if (!Callable(N->getBlock()))
2177 continue;
2178
2179 // Accumulate the child nodes.
2180 for (DomTreeNode *ChildN : *N) {
2181 assert(Visited.insert(ChildN).second &&
2182 "Cannot visit a node twice when walking a tree!");
2183 DomWorklist.push_back(ChildN);
2184 }
2185 } while (!DomWorklist.empty());
2186}
2187
2189 bool CurrentLoopValid, bool PartiallyInvariant,
2190 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2191 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2192 if (!NewLoops.empty())
2193 U.addSiblingLoops(NewLoops);
2194
2195 // If the current loop remains valid, we should revisit it to catch any
2196 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2197 if (CurrentLoopValid) {
2198 if (PartiallyInvariant) {
2199 // Mark the new loop as partially unswitched, to avoid unswitching on
2200 // the same condition again.
2201 auto &Context = L.getHeader()->getContext();
2202 MDNode *DisableUnswitchMD = MDNode::get(
2203 Context,
2204 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2206 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2207 {DisableUnswitchMD});
2208 L.setLoopID(NewLoopID);
2209 } else if (InjectedCondition) {
2210 // Do the same for injection of invariant conditions.
2211 auto &Context = L.getHeader()->getContext();
2212 MDNode *DisableUnswitchMD = MDNode::get(
2213 Context,
2214 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2216 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2217 {DisableUnswitchMD});
2218 L.setLoopID(NewLoopID);
2219 } else
2220 U.revisitCurrentLoop();
2221 } else
2222 U.markLoopAsDeleted(L, LoopName);
2223}
2224
2226 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2227 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2229 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2230 auto *ParentBB = TI.getParent();
2232 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2233
2234 // Save the current loop name in a variable so that we can report it even
2235 // after it has been deleted.
2236 std::string LoopName(L.getName());
2237
2238 // We can only unswitch switches, conditional branches with an invariant
2239 // condition, or combining invariant conditions with an instruction or
2240 // partially invariant instructions.
2241 assert((SI || (BI && BI->isConditional())) &&
2242 "Can only unswitch switches and conditional branch!");
2243 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2244 bool FullUnswitch =
2245 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2246 !PartiallyInvariant);
2247 if (FullUnswitch)
2248 assert(Invariants.size() == 1 &&
2249 "Cannot have other invariants with full unswitching!");
2250 else
2252 "Partial unswitching requires an instruction as the condition!");
2253
2254 if (MSSAU && VerifyMemorySSA)
2255 MSSAU->getMemorySSA()->verifyMemorySSA();
2256
2257 // Constant and BBs tracking the cloned and continuing successor. When we are
2258 // unswitching the entire condition, this can just be trivially chosen to
2259 // unswitch towards `true`. However, when we are unswitching a set of
2260 // invariants combined with `and` or `or` or partially invariant instructions,
2261 // the combining operation determines the best direction to unswitch: we want
2262 // to unswitch the direction that will collapse the branch.
2263 bool Direction = true;
2264 int ClonedSucc = 0;
2265 if (!FullUnswitch) {
2267 (void)Cond;
2269 PartiallyInvariant) &&
2270 "Only `or`, `and`, an `select`, partially invariant instructions "
2271 "can combine invariants being unswitched.");
2272 if (!match(Cond, m_LogicalOr())) {
2273 if (match(Cond, m_LogicalAnd()) ||
2274 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2275 Direction = false;
2276 ClonedSucc = 1;
2277 }
2278 }
2279 }
2280
2281 BasicBlock *RetainedSuccBB =
2282 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2283 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2284 if (BI)
2285 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2286 else
2287 for (auto Case : SI->cases())
2288 if (Case.getCaseSuccessor() != RetainedSuccBB)
2289 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2290
2291 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2292 "Should not unswitch the same successor we are retaining!");
2293
2294 // The branch should be in this exact loop. Any inner loop's invariant branch
2295 // should be handled by unswitching that inner loop. The caller of this
2296 // routine should filter out any candidates that remain (but were skipped for
2297 // whatever reason).
2298 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2299
2300 // Compute the parent loop now before we start hacking on things.
2301 Loop *ParentL = L.getParentLoop();
2302 // Get blocks in RPO order for MSSA update, before changing the CFG.
2303 LoopBlocksRPO LBRPO(&L);
2304 if (MSSAU)
2305 LBRPO.perform(&LI);
2306
2307 // Compute the outer-most loop containing one of our exit blocks. This is the
2308 // furthest up our loopnest which can be mutated, which we will use below to
2309 // update things.
2310 Loop *OuterExitL = &L;
2312 L.getUniqueExitBlocks(ExitBlocks);
2313 for (auto *ExitBB : ExitBlocks) {
2314 // ExitBB can be an exit block for several levels in the loop nest. Make
2315 // sure we find the top most.
2316 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2317 if (!NewOuterExitL) {
2318 // We exited the entire nest with this block, so we're done.
2319 OuterExitL = nullptr;
2320 break;
2321 }
2322 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2323 OuterExitL = NewOuterExitL;
2324 }
2325
2326 // At this point, we're definitely going to unswitch something so invalidate
2327 // any cached information in ScalarEvolution for the outer most loop
2328 // containing an exit block and all nested loops.
2329 if (SE) {
2330 if (OuterExitL)
2331 SE->forgetLoop(OuterExitL);
2332 else
2333 SE->forgetTopmostLoop(&L);
2335 }
2336
2337 // If the edge from this terminator to a successor dominates that successor,
2338 // store a map from each block in its dominator subtree to it. This lets us
2339 // tell when cloning for a particular successor if a block is dominated by
2340 // some *other* successor with a single data structure. We use this to
2341 // significantly reduce cloning.
2343 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2344 UnswitchedSuccBBs))
2345 if (SuccBB->getUniquePredecessor() ||
2346 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2347 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2348 }))
2349 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2350 DominatingSucc[BB] = SuccBB;
2351 return true;
2352 });
2353
2354 // Split the preheader, so that we know that there is a safe place to insert
2355 // the conditional branch. We will change the preheader to have a conditional
2356 // branch on LoopCond. The original preheader will become the split point
2357 // between the unswitched versions, and we will have a new preheader for the
2358 // original loop.
2359 BasicBlock *SplitBB = L.getLoopPreheader();
2360 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2361
2362 // Keep track of the dominator tree updates needed.
2364
2365 // Clone the loop for each unswitched successor.
2367 VMaps.reserve(UnswitchedSuccBBs.size());
2369 for (auto *SuccBB : UnswitchedSuccBBs) {
2370 VMaps.emplace_back(new ValueToValueMapTy());
2371 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2372 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2373 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2374 }
2375
2376 // Drop metadata if we may break its semantics by moving this instr into the
2377 // split block.
2378 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2380 // Do not spend time trying to understand if we can keep it, just drop it
2381 // to save compile time.
2382 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2383 else {
2384 // It is only legal to preserve make.implicit metadata if we are
2385 // guaranteed no reach implicit null check after following this branch.
2386 ICFLoopSafetyInfo SafetyInfo;
2387 SafetyInfo.computeLoopSafetyInfo(&L);
2388 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2389 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2390 }
2391 }
2392
2393 // The stitching of the branched code back together depends on whether we're
2394 // doing full unswitching or not with the exception that we always want to
2395 // nuke the initial terminator placed in the split block.
2396 SplitBB->getTerminator()->eraseFromParent();
2397 if (FullUnswitch) {
2398 // Keep a clone of the terminator for MSSA updates.
2399 Instruction *NewTI = TI.clone();
2400 NewTI->insertInto(ParentBB, ParentBB->end());
2401
2402 // Splice the terminator from the original loop and rewrite its
2403 // successors.
2404 TI.moveBefore(*SplitBB, SplitBB->end());
2405 TI.dropLocation();
2406
2407 // First wire up the moved terminator to the preheaders.
2408 if (BI) {
2409 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2410 BI->setSuccessor(ClonedSucc, ClonedPH);
2411 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2413 if (InsertFreeze) {
2414 // We don't give any debug location to the new freeze, because the
2415 // BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted
2416 // out of the loop.
2417 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2419 }
2420 BI->setCondition(Cond);
2421 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2422 } else {
2423 assert(SI && "Must either be a branch or switch!");
2424
2425 // Walk the cases and directly update their successors.
2426 assert(SI->getDefaultDest() == RetainedSuccBB &&
2427 "Not retaining default successor!");
2428 SI->setDefaultDest(LoopPH);
2429 for (const auto &Case : SI->cases())
2430 if (Case.getCaseSuccessor() == RetainedSuccBB)
2431 Case.setSuccessor(LoopPH);
2432 else
2433 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2434
2435 if (InsertFreeze)
2436 SI->setCondition(new FreezeInst(SI->getCondition(),
2437 SI->getCondition()->getName() + ".fr",
2438 SI->getIterator()));
2439
2440 // We need to use the set to populate domtree updates as even when there
2441 // are multiple cases pointing at the same successor we only want to
2442 // remove and insert one edge in the domtree.
2443 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2444 DTUpdates.push_back(
2445 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2446 }
2447
2448 if (MSSAU) {
2449 DT.applyUpdates(DTUpdates);
2450 DTUpdates.clear();
2451
2452 // Remove all but one edge to the retained block and all unswitched
2453 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2454 // when we know we only keep a single edge for each case.
2455 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2456 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2457 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2458
2459 for (auto &VMap : VMaps)
2460 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2461 /*IgnoreIncomingWithNoClones=*/true);
2462 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2463
2464 // Remove all edges to unswitched blocks.
2465 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2466 MSSAU->removeEdge(ParentBB, SuccBB);
2467 }
2468
2469 // Now unhook the successor relationship as we'll be replacing
2470 // the terminator with a direct branch. This is much simpler for branches
2471 // than switches so we handle those first.
2472 if (BI) {
2473 // Remove the parent as a predecessor of the unswitched successor.
2474 assert(UnswitchedSuccBBs.size() == 1 &&
2475 "Only one possible unswitched block for a branch!");
2476 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2477 UnswitchedSuccBB->removePredecessor(ParentBB,
2478 /*KeepOneInputPHIs*/ true);
2479 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2480 } else {
2481 // Note that we actually want to remove the parent block as a predecessor
2482 // of *every* case successor. The case successor is either unswitched,
2483 // completely eliminating an edge from the parent to that successor, or it
2484 // is a duplicate edge to the retained successor as the retained successor
2485 // is always the default successor and as we'll replace this with a direct
2486 // branch we no longer need the duplicate entries in the PHI nodes.
2487 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2488 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2489 "Not retaining default successor!");
2490 for (const auto &Case : NewSI->cases())
2491 Case.getCaseSuccessor()->removePredecessor(
2492 ParentBB,
2493 /*KeepOneInputPHIs*/ true);
2494
2495 // We need to use the set to populate domtree updates as even when there
2496 // are multiple cases pointing at the same successor we only want to
2497 // remove and insert one edge in the domtree.
2498 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2499 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2500 }
2501
2502 // Create a new unconditional branch to the continuing block (as opposed to
2503 // the one cloned).
2504 Instruction *NewBI = BranchInst::Create(RetainedSuccBB, ParentBB);
2505 NewBI->setDebugLoc(NewTI->getDebugLoc());
2506
2507 // After MSSAU update, remove the cloned terminator instruction NewTI.
2508 NewTI->eraseFromParent();
2509 } else {
2510 assert(BI && "Only branches have partial unswitching.");
2511 assert(UnswitchedSuccBBs.size() == 1 &&
2512 "Only one possible unswitched block for a branch!");
2513 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2514 // When doing a partial unswitch, we have to do a bit more work to build up
2515 // the branch in the split block.
2516 if (PartiallyInvariant)
2518 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2519 else {
2521 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2522 FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
2523 }
2524 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2525
2526 if (MSSAU) {
2527 DT.applyUpdates(DTUpdates);
2528 DTUpdates.clear();
2529
2530 // Perform MSSA cloning updates.
2531 for (auto &VMap : VMaps)
2532 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2533 /*IgnoreIncomingWithNoClones=*/true);
2534 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2535 }
2536 }
2537
2538 // Apply the updates accumulated above to get an up-to-date dominator tree.
2539 DT.applyUpdates(DTUpdates);
2540
2541 // Now that we have an accurate dominator tree, first delete the dead cloned
2542 // blocks so that we can accurately build any cloned loops. It is important to
2543 // not delete the blocks from the original loop yet because we still want to
2544 // reference the original loop to understand the cloned loop's structure.
2545 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2546
2547 // Build the cloned loop structure itself. This may be substantially
2548 // different from the original structure due to the simplified CFG. This also
2549 // handles inserting all the cloned blocks into the correct loops.
2550 SmallVector<Loop *, 4> NonChildClonedLoops;
2551 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2552 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2553
2554 // Now that our cloned loops have been built, we can update the original loop.
2555 // First we delete the dead blocks from it and then we rebuild the loop
2556 // structure taking these deletions into account.
2557 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2558
2559 if (MSSAU && VerifyMemorySSA)
2560 MSSAU->getMemorySSA()->verifyMemorySSA();
2561
2562 SmallVector<Loop *, 4> HoistedLoops;
2563 bool IsStillLoop =
2564 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2565
2566 if (MSSAU && VerifyMemorySSA)
2567 MSSAU->getMemorySSA()->verifyMemorySSA();
2568
2569 // This transformation has a high risk of corrupting the dominator tree, and
2570 // the below steps to rebuild loop structures will result in hard to debug
2571 // errors in that case so verify that the dominator tree is sane first.
2572 // FIXME: Remove this when the bugs stop showing up and rely on existing
2573 // verification steps.
2574 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2575
2576 if (BI && !PartiallyInvariant) {
2577 // If we unswitched a branch which collapses the condition to a known
2578 // constant we want to replace all the uses of the invariants within both
2579 // the original and cloned blocks. We do this here so that we can use the
2580 // now updated dominator tree to identify which side the users are on.
2581 assert(UnswitchedSuccBBs.size() == 1 &&
2582 "Only one possible unswitched block for a branch!");
2583 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2584
2585 // When considering multiple partially-unswitched invariants
2586 // we cant just go replace them with constants in both branches.
2587 //
2588 // For 'AND' we infer that true branch ("continue") means true
2589 // for each invariant operand.
2590 // For 'OR' we can infer that false branch ("continue") means false
2591 // for each invariant operand.
2592 // So it happens that for multiple-partial case we dont replace
2593 // in the unswitched branch.
2594 bool ReplaceUnswitched =
2595 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2596
2597 ConstantInt *UnswitchedReplacement =
2600 ConstantInt *ContinueReplacement =
2603 for (Value *Invariant : Invariants) {
2604 assert(!isa<Constant>(Invariant) &&
2605 "Should not be replacing constant values!");
2606 // Use make_early_inc_range here as set invalidates the iterator.
2607 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2608 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2609 if (!UserI)
2610 continue;
2611
2612 // Replace it with the 'continue' side if in the main loop body, and the
2613 // unswitched if in the cloned blocks.
2614 if (DT.dominates(LoopPH, UserI->getParent()))
2615 U.set(ContinueReplacement);
2616 else if (ReplaceUnswitched &&
2617 DT.dominates(ClonedPH, UserI->getParent()))
2618 U.set(UnswitchedReplacement);
2619 }
2620 }
2621 }
2622
2623 // We can change which blocks are exit blocks of all the cloned sibling
2624 // loops, the current loop, and any parent loops which shared exit blocks
2625 // with the current loop. As a consequence, we need to re-form LCSSA for
2626 // them. But we shouldn't need to re-form LCSSA for any child loops.
2627 // FIXME: This could be made more efficient by tracking which exit blocks are
2628 // new, and focusing on them, but that isn't likely to be necessary.
2629 //
2630 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2631 // loop nest and update every loop that could have had its exits changed. We
2632 // also need to cover any intervening loops. We add all of these loops to
2633 // a list and sort them by loop depth to achieve this without updating
2634 // unnecessary loops.
2635 auto UpdateLoop = [&](Loop &UpdateL) {
2636#ifndef NDEBUG
2637 UpdateL.verifyLoop();
2638 for (Loop *ChildL : UpdateL) {
2639 ChildL->verifyLoop();
2640 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2641 "Perturbed a child loop's LCSSA form!");
2642 }
2643#endif
2644 // First build LCSSA for this loop so that we can preserve it when
2645 // forming dedicated exits. We don't want to perturb some other loop's
2646 // LCSSA while doing that CFG edit.
2647 formLCSSA(UpdateL, DT, &LI, SE);
2648
2649 // For loops reached by this loop's original exit blocks we may
2650 // introduced new, non-dedicated exits. At least try to re-form dedicated
2651 // exits for these loops. This may fail if they couldn't have dedicated
2652 // exits to start with.
2653 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2654 };
2655
2656 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2657 // and we can do it in any order as they don't nest relative to each other.
2658 //
2659 // Also check if any of the loops we have updated have become top-level loops
2660 // as that will necessitate widening the outer loop scope.
2661 for (Loop *UpdatedL :
2662 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2663 UpdateLoop(*UpdatedL);
2664 if (UpdatedL->isOutermost())
2665 OuterExitL = nullptr;
2666 }
2667 if (IsStillLoop) {
2668 UpdateLoop(L);
2669 if (L.isOutermost())
2670 OuterExitL = nullptr;
2671 }
2672
2673 // If the original loop had exit blocks, walk up through the outer most loop
2674 // of those exit blocks to update LCSSA and form updated dedicated exits.
2675 if (OuterExitL != &L)
2676 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2677 OuterL = OuterL->getParentLoop())
2678 UpdateLoop(*OuterL);
2679
2680#ifndef NDEBUG
2681 // Verify the entire loop structure to catch any incorrect updates before we
2682 // progress in the pass pipeline.
2683 LI.verify(DT);
2684#endif
2685
2686 // Now that we've unswitched something, make callbacks to report the changes.
2687 // For that we need to merge together the updated loops and the cloned loops
2688 // and check whether the original loop survived.
2689 SmallVector<Loop *, 4> SibLoops;
2690 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2691 if (UpdatedL->getParentLoop() == ParentL)
2692 SibLoops.push_back(UpdatedL);
2693 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2694 InjectedCondition, SibLoops);
2695
2696 if (MSSAU && VerifyMemorySSA)
2697 MSSAU->getMemorySSA()->verifyMemorySSA();
2698
2699 if (BI)
2700 ++NumBranches;
2701 else
2702 ++NumSwitches;
2703}
2704
2705/// Recursively compute the cost of a dominator subtree based on the per-block
2706/// cost map provided.
2707///
2708/// The recursive computation is memozied into the provided DT-indexed cost map
2709/// to allow querying it for most nodes in the domtree without it becoming
2710/// quadratic.
2712 DomTreeNode &N,
2715 // Don't accumulate cost (or recurse through) blocks not in our block cost
2716 // map and thus not part of the duplication cost being considered.
2717 auto BBCostIt = BBCostMap.find(N.getBlock());
2718 if (BBCostIt == BBCostMap.end())
2719 return 0;
2720
2721 // Lookup this node to see if we already computed its cost.
2722 auto DTCostIt = DTCostMap.find(&N);
2723 if (DTCostIt != DTCostMap.end())
2724 return DTCostIt->second;
2725
2726 // If not, we have to compute it. We can't use insert above and update
2727 // because computing the cost may insert more things into the map.
2728 InstructionCost Cost = std::accumulate(
2729 N.begin(), N.end(), BBCostIt->second,
2730 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2731 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2732 });
2733 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2734 (void)Inserted;
2735 assert(Inserted && "Should not insert a node while visiting children!");
2736 return Cost;
2737}
2738
2739/// Turns a select instruction into implicit control flow branch,
2740/// making the following replacement:
2741///
2742/// head:
2743/// --code before select--
2744/// select %cond, %trueval, %falseval
2745/// --code after select--
2746///
2747/// into
2748///
2749/// head:
2750/// --code before select--
2751/// br i1 %cond, label %then, label %tail
2752///
2753/// then:
2754/// br %tail
2755///
2756/// tail:
2757/// phi [ %trueval, %then ], [ %falseval, %head]
2758/// unreachable
2759///
2760/// It also makes all relevant DT and LI updates, so that all structures are in
2761/// valid state after this transform.
2763 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2764 AssumptionCache *AC) {
2765 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2766 BasicBlock *HeadBB = SI->getParent();
2767
2768 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2769 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2770 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2771 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2772 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2773 *TailBB = CondBr->getSuccessor(1);
2774 if (MSSAU)
2775 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2776
2777 PHINode *Phi =
2778 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
2779 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2780 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2781 Phi->setDebugLoc(SI->getDebugLoc());
2782 SI->replaceAllUsesWith(Phi);
2783 SI->eraseFromParent();
2784
2785 if (MSSAU && VerifyMemorySSA)
2786 MSSAU->getMemorySSA()->verifyMemorySSA();
2787
2788 ++NumSelects;
2789 return CondBr;
2790}
2791
2792/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2793/// making the following replacement:
2794///
2795/// --code before guard--
2796/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2797/// --code after guard--
2798///
2799/// into
2800///
2801/// --code before guard--
2802/// br i1 %cond, label %guarded, label %deopt
2803///
2804/// guarded:
2805/// --code after guard--
2806///
2807/// deopt:
2808/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2809/// unreachable
2810///
2811/// It also makes all relevant DT and LI updates, so that all structures are in
2812/// valid state after this transform.
2814 DominatorTree &DT, LoopInfo &LI,
2815 MemorySSAUpdater *MSSAU) {
2816 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2817 BasicBlock *CheckBB = GI->getParent();
2818
2819 if (MSSAU && VerifyMemorySSA)
2820 MSSAU->getMemorySSA()->verifyMemorySSA();
2821
2822 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2823 Instruction *DeoptBlockTerm =
2825 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2826 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2827 // SplitBlockAndInsertIfThen inserts control flow that branches to
2828 // DeoptBlockTerm if the condition is true. We want the opposite.
2829 CheckBI->swapSuccessors();
2830
2831 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2832 GuardedBlock->setName("guarded");
2833 CheckBI->getSuccessor(1)->setName("deopt");
2834 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2835
2836 if (MSSAU)
2837 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2838
2839 GI->moveBefore(DeoptBlockTerm->getIterator());
2841
2842 if (MSSAU) {
2844 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2845 if (VerifyMemorySSA)
2846 MSSAU->getMemorySSA()->verifyMemorySSA();
2847 }
2848
2849 if (VerifyLoopInfo)
2850 LI.verify(DT);
2851 ++NumGuards;
2852 return CheckBI;
2853}
2854
2855/// Cost multiplier is a way to limit potentially exponential behavior
2856/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
2857/// candidates available. Also consider the number of "sibling" loops with
2858/// the idea of accounting for previous unswitches that already happened on this
2859/// cluster of loops. There was an attempt to keep this formula simple,
2860/// just enough to limit the worst case behavior. Even if it is not that simple
2861/// now it is still not an attempt to provide a detailed heuristic size
2862/// prediction.
2863///
2864/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2865/// unswitch candidates, making adequate predictions instead of wild guesses.
2866/// That requires knowing not just the number of "remaining" candidates but
2867/// also costs of unswitching for each of these candidates.
2869 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2870 const DominatorTree &DT,
2871 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2872
2873 // Guards and other exiting conditions do not contribute to exponential
2874 // explosion as soon as they dominate the latch (otherwise there might be
2875 // another path to the latch remaining that does not allow to eliminate the
2876 // loop copy on unswitch).
2877 const BasicBlock *Latch = L.getLoopLatch();
2878 const BasicBlock *CondBlock = TI.getParent();
2879 if (DT.dominates(CondBlock, Latch) &&
2880 (isGuard(&TI) ||
2881 (TI.isTerminator() &&
2882 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2883 return L.contains(SuccBB);
2884 }) <= 1))) {
2885 NumCostMultiplierSkipped++;
2886 return 1;
2887 }
2888
2889 // Each invariant non-trivial condition, after being unswitched, is supposed
2890 // to have its own specialized sibling loop (the invariant condition has been
2891 // hoisted out of the child loop into a newly-cloned loop). When unswitching
2892 // conditions in nested loops, the basic block size of the outer loop should
2893 // not be altered. If such a size significantly increases across unswitching
2894 // invocations, something may be wrong; so adjust the final cost taking this
2895 // into account.
2896 auto *ParentL = L.getParentLoop();
2897 int ParentLoopSizeMultiplier = 1;
2898 if (ParentL)
2899 ParentLoopSizeMultiplier =
2900 std::max<int>(ParentL->getNumBlocks() / UnswitchParentBlocksDiv, 1);
2901
2902 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2903 : std::distance(LI.begin(), LI.end()));
2904 // Count amount of clones that all the candidates might cause during
2905 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2906 // cases.
2907 int UnswitchedClones = 0;
2908 for (const auto &Candidate : UnswitchCandidates) {
2909 const Instruction *CI = Candidate.TI;
2910 const BasicBlock *CondBlock = CI->getParent();
2911 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2912 if (isa<SelectInst>(CI)) {
2913 UnswitchedClones++;
2914 continue;
2915 }
2916 if (isGuard(CI)) {
2917 if (!SkipExitingSuccessors)
2918 UnswitchedClones++;
2919 continue;
2920 }
2921 int NonExitingSuccessors =
2922 llvm::count_if(successors(CondBlock),
2923 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2924 return !SkipExitingSuccessors || L.contains(SuccBB);
2925 });
2926 UnswitchedClones += Log2_32(NonExitingSuccessors);
2927 }
2928
2929 // Ignore up to the "unscaled candidates" number of unswitch candidates
2930 // when calculating the power-of-two scaling of the cost. The main idea
2931 // with this control is to allow a small number of unswitches to happen
2932 // and rely more on siblings multiplier (see below) when the number
2933 // of candidates is small.
2934 unsigned ClonesPower =
2935 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2936
2937 // Allowing top-level loops to spread a bit more than nested ones.
2938 int SiblingsMultiplier =
2939 std::max((ParentL ? SiblingsCount
2940 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2941 1);
2942 // Compute the cost multiplier in a way that won't overflow by saturating
2943 // at an upper bound.
2944 int CostMultiplier;
2945 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2946 SiblingsMultiplier > UnswitchThreshold ||
2947 ParentLoopSizeMultiplier > UnswitchThreshold)
2948 CostMultiplier = UnswitchThreshold;
2949 else
2950 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2951 (int)UnswitchThreshold);
2952
2953 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2954 << " (siblings " << SiblingsMultiplier << " * parent size "
2955 << ParentLoopSizeMultiplier << " * clones "
2956 << (1 << ClonesPower) << ")"
2957 << " for unswitch candidate: " << TI << "\n");
2958 return CostMultiplier;
2959}
2960
2963 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2964 const Loop &L, const LoopInfo &LI, AAResults &AA,
2965 const MemorySSAUpdater *MSSAU) {
2966 assert(UnswitchCandidates.empty() && "Should be!");
2967
2968 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2970 if (isa<Constant>(Cond))
2971 return;
2972 if (L.isLoopInvariant(Cond)) {
2973 UnswitchCandidates.push_back({I, {Cond}});
2974 return;
2975 }
2977 TinyPtrVector<Value *> Invariants =
2979 L, *static_cast<Instruction *>(Cond), LI);
2980 if (!Invariants.empty())
2981 UnswitchCandidates.push_back({I, std::move(Invariants)});
2982 }
2983 };
2984
2985 // Whether or not we should also collect guards in the loop.
2986 bool CollectGuards = false;
2987 if (UnswitchGuards) {
2988 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
2989 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
2990 if (GuardDecl && !GuardDecl->use_empty())
2991 CollectGuards = true;
2992 }
2993
2994 for (auto *BB : L.blocks()) {
2995 if (LI.getLoopFor(BB) != &L)
2996 continue;
2997
2998 for (auto &I : *BB) {
2999 if (auto *SI = dyn_cast<SelectInst>(&I)) {
3000 auto *Cond = SI->getCondition();
3001 // Do not unswitch vector selects and logical and/or selects
3002 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
3003 AddUnswitchCandidatesForInst(SI, Cond);
3004 } else if (CollectGuards && isGuard(&I)) {
3005 auto *Cond =
3006 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
3007 // TODO: Support AND, OR conditions and partial unswitching.
3008 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
3009 UnswitchCandidates.push_back({&I, {Cond}});
3010 }
3011 }
3012
3013 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
3014 // We can only consider fully loop-invariant switch conditions as we need
3015 // to completely eliminate the switch after unswitching.
3016 if (!isa<Constant>(SI->getCondition()) &&
3017 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
3018 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
3019 continue;
3020 }
3021
3022 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
3023 if (!BI || !BI->isConditional() ||
3024 BI->getSuccessor(0) == BI->getSuccessor(1))
3025 continue;
3026
3027 AddUnswitchCandidatesForInst(BI, BI->getCondition());
3028 }
3029
3030 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
3031 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
3032 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
3033 })) {
3034 MemorySSA *MSSA = MSSAU->getMemorySSA();
3035 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
3036 LLVM_DEBUG(
3037 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
3038 << *Info->InstToDuplicate[0] << "\n");
3039 PartialIVInfo = *Info;
3040 PartialIVCondBranch = L.getHeader()->getTerminator();
3041 TinyPtrVector<Value *> ValsToDuplicate;
3042 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
3043 UnswitchCandidates.push_back(
3044 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
3045 }
3046 }
3047 return !UnswitchCandidates.empty();
3048}
3049
3050/// Tries to canonicalize condition described by:
3051///
3052/// br (LHS pred RHS), label IfTrue, label IfFalse
3053///
3054/// into its equivalent where `Pred` is something that we support for injected
3055/// invariants (so far it is limited to ult), LHS in canonicalized form is
3056/// non-invariant and RHS is an invariant.
3058 Value *&LHS, Value *&RHS,
3059 BasicBlock *&IfTrue,
3060 BasicBlock *&IfFalse,
3061 const Loop &L) {
3062 if (!L.contains(IfTrue)) {
3063 Pred = ICmpInst::getInversePredicate(Pred);
3064 std::swap(IfTrue, IfFalse);
3065 }
3066
3067 // Move loop-invariant argument to RHS position.
3068 if (L.isLoopInvariant(LHS)) {
3069 Pred = ICmpInst::getSwappedPredicate(Pred);
3070 std::swap(LHS, RHS);
3071 }
3072
3073 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
3074 // Turn "x >=s 0" into "x <u UMIN_INT"
3075 Pred = ICmpInst::ICMP_ULT;
3076 RHS = ConstantInt::get(
3077 RHS->getContext(),
3078 APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
3079 }
3080}
3081
3082/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
3083/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
3084/// injecting a loop-invariant condition.
3086 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3087 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3088 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3089 return false;
3090 // TODO: Support other predicates.
3091 if (Pred != ICmpInst::ICMP_ULT)
3092 return false;
3093 // TODO: Support non-loop-exiting branches?
3094 if (!L.contains(IfTrue) || L.contains(IfFalse))
3095 return false;
3096 // FIXME: For some reason this causes problems with MSSA updates, need to
3097 // investigate why. So far, just don't unswitch latch.
3098 if (L.getHeader() == IfTrue)
3099 return false;
3100 return true;
3101}
3102
3103/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3104/// TakenSucc via injection of invariant conditions. The branch should be not
3105/// enough and not previously unswitched, the information about this comes from
3106/// the metadata.
3108 const BasicBlock *TakenSucc) {
3109 SmallVector<uint32_t> Weights;
3110 if (!extractBranchWeights(*BI, Weights))
3111 return false;
3113 BranchProbability LikelyTaken(T - 1, T);
3114
3115 assert(Weights.size() == 2 && "Unexpected profile data!");
3116 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3117 auto Num = Weights[Idx];
3118 auto Denom = Weights[0] + Weights[1];
3119 // Degenerate or overflowed metadata.
3120 if (Denom == 0 || Num > Denom)
3121 return false;
3122 BranchProbability ActualTaken(Num, Denom);
3123 if (LikelyTaken > ActualTaken)
3124 return false;
3125 return true;
3126}
3127
3128/// Materialize pending invariant condition of the given candidate into IR. The
3129/// injected loop-invariant condition implies the original loop-variant branch
3130/// condition, so the materialization turns
3131///
3132/// loop_block:
3133/// ...
3134/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3135///
3136/// into
3137///
3138/// preheader:
3139/// %invariant_cond = LHS pred RHS
3140/// ...
3141/// loop_block:
3142/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3143/// OriginalCheck:
3144/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3145/// ...
3146static NonTrivialUnswitchCandidate
3147injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3148 DominatorTree &DT, LoopInfo &LI,
3149 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3150 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3151 BasicBlock *Preheader = L.getLoopPreheader();
3152 assert(Preheader && "Loop is not in simplified form?");
3153 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3154 "Unswitching branch of inner loop!");
3155
3156 auto Pred = Candidate.PendingInjection->Pred;
3157 auto *LHS = Candidate.PendingInjection->LHS;
3158 auto *RHS = Candidate.PendingInjection->RHS;
3159 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3160 auto *TI = cast<BranchInst>(Candidate.TI);
3161 auto *BB = Candidate.TI->getParent();
3162 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3163 : TI->getSuccessor(0);
3164 // FIXME: Remove this once limitation on successors is lifted.
3165 assert(L.contains(InLoopSucc) && "Not supported yet!");
3166 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3167 auto &Ctx = BB->getContext();
3168
3169 IRBuilder<> Builder(Preheader->getTerminator());
3170 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3171 if (LHS->getType() != RHS->getType()) {
3172 if (LHS->getType()->getIntegerBitWidth() <
3173 RHS->getType()->getIntegerBitWidth())
3174 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3175 else
3176 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3177 }
3178 // Do not use builder here: CreateICmp may simplify this into a constant and
3179 // unswitching will break. Better optimize it away later.
3180 auto *InjectedCond =
3181 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3182 Preheader->getTerminator()->getIterator());
3183
3184 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3185 BB->getParent(), InLoopSucc);
3186 Builder.SetInsertPoint(TI);
3187 auto *InvariantBr =
3188 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3189
3190 Builder.SetInsertPoint(CheckBlock);
3191 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3192 TI->getSuccessor(1));
3193 TI->eraseFromParent();
3194
3195 // Fixup phis.
3196 for (auto &I : *InLoopSucc) {
3197 auto *PN = dyn_cast<PHINode>(&I);
3198 if (!PN)
3199 break;
3200 auto *Inc = PN->getIncomingValueForBlock(BB);
3201 PN->addIncoming(Inc, CheckBlock);
3202 }
3203 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3204
3206 { DominatorTree::Insert, BB, CheckBlock },
3207 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3208 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3209 { DominatorTree::Delete, BB, OutOfLoopSucc }
3210 };
3211
3212 DT.applyUpdates(DTUpdates);
3213 if (MSSAU)
3214 MSSAU->applyUpdates(DTUpdates, DT);
3215 L.addBasicBlockToLoop(CheckBlock, LI);
3216
3217#ifndef NDEBUG
3218 DT.verify();
3219 LI.verify(DT);
3220 if (MSSAU && VerifyMemorySSA)
3221 MSSAU->getMemorySSA()->verifyMemorySSA();
3222#endif
3223
3224 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3225 // higher because we have just inserted a new block. Need to think how to
3226 // adjust the cost of injected candidates when it was first computed.
3227 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3228 << " and considering it for unswitching.");
3229 ++NumInvariantConditionsInjected;
3230 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3231 Candidate.Cost);
3232}
3233
3234/// Given chain of loop branch conditions looking like:
3235/// br (Variant < Invariant1)
3236/// br (Variant < Invariant2)
3237/// br (Variant < Invariant3)
3238/// ...
3239/// collect set of invariant conditions on which we want to unswitch, which
3240/// look like:
3241/// Invariant1 <= Invariant2
3242/// Invariant2 <= Invariant3
3243/// ...
3244/// Though they might not immediately exist in the IR, we can still inject them.
3246 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3248 const DominatorTree &DT) {
3249
3252 if (Compares.size() < 2)
3253 return false;
3255 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3256 Next != Compares.end(); ++Prev, ++Next) {
3257 Value *LHS = Next->Invariant;
3258 Value *RHS = Prev->Invariant;
3259 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3260 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3261 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3262 std::nullopt, std::move(ToInject));
3263 UnswitchCandidates.push_back(std::move(Candidate));
3264 }
3265 return true;
3266}
3267
3268/// Collect unswitch candidates by invariant conditions that are not immediately
3269/// present in the loop. However, they can be injected into the code if we
3270/// decide it's profitable.
3271/// An example of such conditions is following:
3272///
3273/// for (...) {
3274/// x = load ...
3275/// if (! x <u C1) break;
3276/// if (! x <u C2) break;
3277/// <do something>
3278/// }
3279///
3280/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3281/// C2" automatically implies "x <u C2", so we can get rid of one of
3282/// loop-variant checks in unswitched loop version.
3285 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3286 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3287 const MemorySSAUpdater *MSSAU) {
3289 return false;
3290
3291 if (!DT.isReachableFromEntry(L.getHeader()))
3292 return false;
3293 auto *Latch = L.getLoopLatch();
3294 // Need to have a single latch and a preheader.
3295 if (!Latch)
3296 return false;
3297 assert(L.getLoopPreheader() && "Must have a preheader!");
3298
3300 // Traverse the conditions that dominate latch (and therefore dominate each
3301 // other).
3302 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3303 DTN = DTN->getIDom()) {
3304 CmpPredicate Pred;
3305 Value *LHS = nullptr, *RHS = nullptr;
3306 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3307 auto *BB = DTN->getBlock();
3308 // Ignore inner loops.
3309 if (LI.getLoopFor(BB) != &L)
3310 continue;
3311 auto *Term = BB->getTerminator();
3312 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3313 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3314 continue;
3315 if (!LHS->getType()->isIntegerTy())
3316 continue;
3317 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3318 L);
3319 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3320 continue;
3322 continue;
3323 // Strip ZEXT for unsigned predicate.
3324 // TODO: once signed predicates are supported, also strip SEXT.
3325 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3326 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3327 LHS = Zext->getOperand(0);
3328 CandidatesULT[LHS].push_back(Desc);
3329 }
3330
3331 bool Found = false;
3332 for (auto &It : CandidatesULT)
3334 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3335 return Found;
3336}
3337
3339 if (!L.isSafeToClone())
3340 return false;
3341 for (auto *BB : L.blocks())
3342 for (auto &I : *BB) {
3343 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3344 return false;
3345 if (auto *CB = dyn_cast<CallBase>(&I)) {
3346 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3347 if (CB->isConvergent())
3348 return false;
3349 }
3350 }
3351
3352 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3353 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3354 // irreducible control flow into reducible control flow and introduce new
3355 // loops "out of thin air". If we ever discover important use cases for doing
3356 // this, we can add support to loop unswitch, but it is a lot of complexity
3357 // for what seems little or no real world benefit.
3358 LoopBlocksRPO RPOT(&L);
3359 RPOT.perform(&LI);
3361 return false;
3362
3364 L.getUniqueExitBlocks(ExitBlocks);
3365 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3366 // instruction as we don't know how to split those exit blocks.
3367 // FIXME: We should teach SplitBlock to handle this and remove this
3368 // restriction.
3369 for (auto *ExitBB : ExitBlocks) {
3370 auto It = ExitBB->getFirstNonPHIIt();
3372 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3373 "in exit block\n");
3374 return false;
3375 }
3376 }
3377
3378 return true;
3379}
3380
3381static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3382 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3383 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3384 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3385 // Given that unswitching these terminators will require duplicating parts of
3386 // the loop, so we need to be able to model that cost. Compute the ephemeral
3387 // values and set up a data structure to hold per-BB costs. We cache each
3388 // block's cost so that we don't recompute this when considering different
3389 // subsets of the loop for duplication during unswitching.
3391 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3393
3394 // Compute the cost of each block, as well as the total loop cost. Also, bail
3395 // out if we see instructions which are incompatible with loop unswitching
3396 // (convergent, noduplicate, or cross-basic-block tokens).
3397 // FIXME: We might be able to safely handle some of these in non-duplicated
3398 // regions.
3400 L.getHeader()->getParent()->hasMinSize()
3403 InstructionCost LoopCost = 0;
3404 for (auto *BB : L.blocks()) {
3405 InstructionCost Cost = 0;
3406 for (auto &I : *BB) {
3407 if (EphValues.count(&I))
3408 continue;
3409 Cost += TTI.getInstructionCost(&I, CostKind);
3410 }
3411 assert(Cost >= 0 && "Must not have negative costs!");
3412 LoopCost += Cost;
3413 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3414 BBCostMap[BB] = Cost;
3415 }
3416 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3417
3418 // Now we find the best candidate by searching for the one with the following
3419 // properties in order:
3420 //
3421 // 1) An unswitching cost below the threshold
3422 // 2) The smallest number of duplicated unswitch candidates (to avoid
3423 // creating redundant subsequent unswitching)
3424 // 3) The smallest cost after unswitching.
3425 //
3426 // We prioritize reducing fanout of unswitch candidates provided the cost
3427 // remains below the threshold because this has a multiplicative effect.
3428 //
3429 // This requires memoizing each dominator subtree to avoid redundant work.
3430 //
3431 // FIXME: Need to actually do the number of candidates part above.
3433 // Given a terminator which might be unswitched, computes the non-duplicated
3434 // cost for that terminator.
3435 auto ComputeUnswitchedCost = [&](Instruction &TI,
3436 bool FullUnswitch) -> InstructionCost {
3437 // Unswitching selects unswitches the entire loop.
3438 if (isa<SelectInst>(TI))
3439 return LoopCost;
3440
3441 BasicBlock &BB = *TI.getParent();
3443
3444 InstructionCost Cost = 0;
3445 for (BasicBlock *SuccBB : successors(&BB)) {
3446 // Don't count successors more than once.
3447 if (!Visited.insert(SuccBB).second)
3448 continue;
3449
3450 // If this is a partial unswitch candidate, then it must be a conditional
3451 // branch with a condition of either `or`, `and`, their corresponding
3452 // select forms or partially invariant instructions. In that case, one of
3453 // the successors is necessarily duplicated, so don't even try to remove
3454 // its cost.
3455 if (!FullUnswitch) {
3456 auto &BI = cast<BranchInst>(TI);
3457 Value *Cond = skipTrivialSelect(BI.getCondition());
3458 if (match(Cond, m_LogicalAnd())) {
3459 if (SuccBB == BI.getSuccessor(1))
3460 continue;
3461 } else if (match(Cond, m_LogicalOr())) {
3462 if (SuccBB == BI.getSuccessor(0))
3463 continue;
3464 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3465 SuccBB == BI.getSuccessor(0)) ||
3466 (!PartialIVInfo.KnownValue->isOneValue() &&
3467 SuccBB == BI.getSuccessor(1)))
3468 continue;
3469 }
3470
3471 // This successor's domtree will not need to be duplicated after
3472 // unswitching if the edge to the successor dominates it (and thus the
3473 // entire tree). This essentially means there is no other path into this
3474 // subtree and so it will end up live in only one clone of the loop.
3475 if (SuccBB->getUniquePredecessor() ||
3476 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3477 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3478 })) {
3479 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3480 assert(Cost <= LoopCost &&
3481 "Non-duplicated cost should never exceed total loop cost!");
3482 }
3483 }
3484
3485 // Now scale the cost by the number of unique successors minus one. We
3486 // subtract one because there is already at least one copy of the entire
3487 // loop. This is computing the new cost of unswitching a condition.
3488 // Note that guards always have 2 unique successors that are implicit and
3489 // will be materialized if we decide to unswitch it.
3490 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3491 assert(SuccessorsCount > 1 &&
3492 "Cannot unswitch a condition without multiple distinct successors!");
3493 return (LoopCost - Cost) * (SuccessorsCount - 1);
3494 };
3495
3496 std::optional<NonTrivialUnswitchCandidate> Best;
3497 for (auto &Candidate : UnswitchCandidates) {
3498 Instruction &TI = *Candidate.TI;
3499 ArrayRef<Value *> Invariants = Candidate.Invariants;
3501 bool FullUnswitch =
3502 !BI || Candidate.hasPendingInjection() ||
3503 (Invariants.size() == 1 &&
3504 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3505 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3506 // Calculate cost multiplier which is a tool to limit potentially
3507 // exponential behavior of loop-unswitch.
3509 int CostMultiplier =
3510 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3511 assert(
3512 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3513 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3514 CandidateCost *= CostMultiplier;
3515 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3516 << " (multiplier: " << CostMultiplier << ")"
3517 << " for unswitch candidate: " << TI << "\n");
3518 } else {
3519 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3520 << " for unswitch candidate: " << TI << "\n");
3521 }
3522
3523 if (!Best || CandidateCost < Best->Cost) {
3524 Best = Candidate;
3525 Best->Cost = CandidateCost;
3526 }
3527 }
3528 assert(Best && "Must be!");
3529 return *Best;
3530}
3531
3532// Insert a freeze on an unswitched branch if all is true:
3533// 1. freeze-loop-unswitch-cond option is true
3534// 2. The branch may not execute in the loop pre-transformation. If a branch may
3535// not execute and could cause UB, it would always cause UB if it is hoisted outside
3536// of the loop. Insert a freeze to prevent this case.
3537// 3. The branch condition may be poison or undef
3539 AssumptionCache &AC) {
3542 return false;
3543
3544 ICFLoopSafetyInfo SafetyInfo;
3545 SafetyInfo.computeLoopSafetyInfo(&L);
3546 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3547 return false;
3548
3549 Value *Cond;
3550 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3551 Cond = skipTrivialSelect(BI->getCondition());
3552 else
3555 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3556}
3557
3561 MemorySSAUpdater *MSSAU,
3562 LPMUpdater &LoopUpdater) {
3563 // Collect all invariant conditions within this loop (as opposed to an inner
3564 // loop which would be handled when visiting that inner loop).
3566 IVConditionInfo PartialIVInfo;
3567 Instruction *PartialIVCondBranch = nullptr;
3568 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3569 PartialIVCondBranch, L, LI, AA, MSSAU);
3570 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3571 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3572 PartialIVCondBranch, L, DT, LI, AA,
3573 MSSAU);
3574 // If we didn't find any candidates, we're done.
3575 if (UnswitchCandidates.empty())
3576 return false;
3577
3578 LLVM_DEBUG(
3579 dbgs() << "Considering " << UnswitchCandidates.size()
3580 << " non-trivial loop invariant conditions for unswitching.\n");
3581
3582 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3583 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3584
3585 assert(Best.TI && "Failed to find loop unswitch candidate");
3586 assert(Best.Cost && "Failed to compute cost");
3587
3588 if (*Best.Cost >= UnswitchThreshold) {
3589 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3590 << "\n");
3591 return false;
3592 }
3593
3594 bool InjectedCondition = false;
3595 if (Best.hasPendingInjection()) {
3596 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3597 InjectedCondition = true;
3598 }
3599 assert(!Best.hasPendingInjection() &&
3600 "All injections should have been done by now!");
3601
3602 if (Best.TI != PartialIVCondBranch)
3603 PartialIVInfo.InstToDuplicate.clear();
3604
3605 bool InsertFreeze;
3606 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3607 // If the best candidate is a select, turn it into a branch. Select
3608 // instructions with a poison conditional do not propagate poison, but
3609 // branching on poison causes UB. Insert a freeze on the select
3610 // conditional to prevent UB after turning the select into a branch.
3611 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3612 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3613 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3614 } else {
3615 // If the best candidate is a guard, turn it into a branch.
3616 if (isGuard(Best.TI))
3617 Best.TI =
3618 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3619 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3620 }
3621
3622 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3623 << ") terminator: " << *Best.TI << "\n");
3624 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3625 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3626 InjectedCondition);
3627 return true;
3628}
3629
3630/// Unswitch control flow predicated on loop invariant conditions.
3631///
3632/// This first hoists all branches or switches which are trivial (IE, do not
3633/// require duplicating any part of the loop) out of the loop body. It then
3634/// looks at other loop invariant control flows and tries to unswitch those as
3635/// well by cloning the loop if the result is small enough.
3636///
3637/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3638/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3639/// valid (i.e. its use is enabled).
3640///
3641/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3642/// true, we will attempt to do non-trivial unswitching as well as trivial
3643/// unswitching.
3644///
3645/// The `postUnswitch` function will be run after unswitching is complete
3646/// with information on whether or not the provided loop remains a loop and
3647/// a list of new sibling loops created.
3648///
3649/// If `SE` is non-null, we will update that analysis based on the unswitching
3650/// done.
3651static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3653 TargetTransformInfo &TTI, bool Trivial,
3654 bool NonTrivial, ScalarEvolution *SE,
3655 MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
3656 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3657 "Loops must be in LCSSA form before unswitching.");
3658
3659 // Must be in loop simplified form: we need a preheader and dedicated exits.
3660 if (!L.isLoopSimplifyForm())
3661 return false;
3662
3663 // Try trivial unswitch first before loop over other basic blocks in the loop.
3664 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3665 // If we unswitched successfully we will want to clean up the loop before
3666 // processing it further so just mark it as unswitched and return.
3667 postUnswitch(L, LoopUpdater, L.getName(),
3668 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3669 /*InjectedCondition*/ false, {});
3670 return true;
3671 }
3672
3673 const Function *F = L.getHeader()->getParent();
3674
3675 // Check whether we should continue with non-trivial conditions.
3676 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3677 // unswitching for testing and debugging.
3678 // NonTrivial: Parameter that enables non-trivial unswitching for this
3679 // invocation of the transform. But this should be allowed only
3680 // for targets without branch divergence.
3681 //
3682 // FIXME: If divergence analysis becomes available to a loop
3683 // transform, we should allow unswitching for non-trivial uniform
3684 // branches even on targets that have divergence.
3685 // https://bugs.llvm.org/show_bug.cgi?id=48819
3686 bool ContinueWithNonTrivial =
3687 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
3688 if (!ContinueWithNonTrivial)
3689 return false;
3690
3691 // Skip non-trivial unswitching for optsize functions.
3692 if (F->hasOptSize())
3693 return false;
3694
3695 // Perform legality checks.
3697 return false;
3698
3699 // For non-trivial unswitching, because it often creates new loops, we rely on
3700 // the pass manager to iterate on the loops rather than trying to immediately
3701 // reach a fixed point. There is no substantial advantage to iterating
3702 // internally, and if any of the new loops are simplified enough to contain
3703 // trivial unswitching we want to prefer those.
3704
3705 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3706 // a partial unswitch when possible below the threshold.
3707 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3708 return true;
3709
3710 // No other opportunities to unswitch.
3711 return false;
3712}
3713
3716 LPMUpdater &U) {
3717 Function &F = *L.getHeader()->getParent();
3718 (void)F;
3719 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3720 << "\n");
3721
3722 std::optional<MemorySSAUpdater> MSSAU;
3723 if (AR.MSSA) {
3724 MSSAU = MemorySSAUpdater(AR.MSSA);
3725 if (VerifyMemorySSA)
3726 AR.MSSA->verifyMemorySSA();
3727 }
3728 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3729 &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
3730 return PreservedAnalyses::all();
3731
3732 if (AR.MSSA && VerifyMemorySSA)
3733 AR.MSSA->verifyMemorySSA();
3734
3735 // Historically this pass has had issues with the dominator tree so verify it
3736 // in asserts builds.
3737 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3738
3739 auto PA = getLoopPassPreservedAnalyses();
3740 if (AR.MSSA)
3741 PA.preserve<MemorySSAAnalysis>();
3742 return PA;
3743}
3744
3746 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3748 OS, MapClassName2PassName);
3749
3750 OS << '<';
3751 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3752 OS << (Trivial ? "" : "no-") << "trivial";
3753 OS << '>';
3754}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
static Value * getCondition(Instruction *I)
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, const BranchInst &ComputeProfFrom)
Copy a set of loop invariant values Invariants and insert them at the end of BB and conditionally bra...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:132
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
iterator begin() const
Definition ArrayRef.h:131
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:138
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
size_t size() const
Definition BasicBlock.h:480
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition BasicBlock.h:386
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
static LLVM_ABI bool isStrictPredicate(Predicate predicate)
This is a static version that you can use without an instruction available.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getCompilerGenerated()
Definition DebugLoc.h:163
static DebugLoc getDropped()
Definition DebugLoc.h:164
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator begin()
Definition DenseMap.h:78
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
StringRef getName() const
Definition LoopInfo.h:389
Metadata node.
Definition Metadata.h:1078
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
LLVM_ABI void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
LLVM_ABI void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
LLVM_ABI void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
LLVM_ABI MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
LLVM_ABI void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
LLVM_ABI void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
LLVM_ABI void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
LLVM_ABI void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition MemorySSA.h:768
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
The main scalar evolution driver.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:261
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition SetVector.h:105
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition ValueMap.h:167
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition ValueMap.h:156
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ?
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2058
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, StringRef PassName)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Op::Description Desc
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:95
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition CFG.h:149
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition STLExtras.h:852
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:58
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1703
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static cl::opt< bool > EstimateProfile("simple-loop-unswitch-estimate-profile", cl::Hidden, cl::init(true))
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
auto predecessors(const MachineBasicBlock *BB)
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
static cl::opt< int > UnswitchParentBlocksDiv("unswitch-parent-blocks-div", cl::init(8), cl::Hidden, cl::desc("Outer loop size divisor for cost multiplier."))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Struct to hold information about a partially invariant condition.
Definition LoopUtils.h:622
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition LoopUtils.h:625
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition LoopUtils.h:628
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70