LLVM 22.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/Dominators.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Module.h"
46#include "llvm/IR/Use.h"
47#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <iterator>
64#include <numeric>
65#include <optional>
66#include <utility>
67
68#define DEBUG_TYPE "simple-loop-unswitch"
69
70using namespace llvm;
71using namespace llvm::PatternMatch;
72
73STATISTIC(NumBranches, "Number of branches unswitched");
74STATISTIC(NumSwitches, "Number of switches unswitched");
75STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
76STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
77STATISTIC(NumTrivial, "Number of unswitches that are trivial");
79 NumCostMultiplierSkipped,
80 "Number of unswitch candidates that had their cost multiplier skipped");
81STATISTIC(NumInvariantConditionsInjected,
82 "Number of invariant conditions injected and unswitched");
83
84namespace llvm {
86 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
87 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
88 "following the configuration passed into the pass."));
89
90static cl::opt<int>
91 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
92 cl::desc("The cost threshold for unswitching a loop."));
93
95 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
96 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
97 "explosion in nontrivial unswitch."));
99 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
100 cl::desc("Toplevel siblings divisor for cost multiplier."));
102 "unswitch-parent-blocks-div", cl::init(8), cl::Hidden,
103 cl::desc("Outer loop size divisor for cost multiplier."));
105 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
106 cl::desc("Number of unswitch candidates that are ignored when calculating "
107 "cost multiplier."));
109 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
110 cl::desc("If enabled, simple loop unswitching will also consider "
111 "llvm.experimental.guard intrinsics as unswitch candidates."));
113 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
114 cl::init(false), cl::Hidden,
115 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
116 "null checks to save time analyzing if we can keep it."));
118 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
119 cl::desc("Max number of memory uses to explore during "
120 "partial unswitching analysis"),
121 cl::init(100), cl::Hidden);
123 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
124 cl::desc("If enabled, the freeze instruction will be added to condition "
125 "of loop unswitch to prevent miscompilation."));
126
128 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
129 cl::desc("Whether we should inject new invariants and unswitch them to "
130 "eliminate some existing (non-invariant) conditions."),
131 cl::init(true));
132
134 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
136 cl::desc("Only try to inject loop invariant conditions and "
137 "unswitch on them to eliminate branches that are "
138 "not-taken 1/<this option> times or less."),
139 cl::init(16));
140
141static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
142 cl::Hidden, cl::init(true));
144} // namespace llvm
145
147namespace {
148struct CompareDesc {
149 BranchInst *Term;
150 Value *Invariant;
151 BasicBlock *InLoopSucc;
152
153 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
154 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
155};
156
157struct InjectedInvariant {
158 ICmpInst::Predicate Pred;
159 Value *LHS;
160 Value *RHS;
161 BasicBlock *InLoopSucc;
162
163 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
164 BasicBlock *InLoopSucc)
165 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
166};
167
168struct NonTrivialUnswitchCandidate {
169 Instruction *TI = nullptr;
170 TinyPtrVector<Value *> Invariants;
171 std::optional<InstructionCost> Cost;
172 std::optional<InjectedInvariant> PendingInjection;
173 NonTrivialUnswitchCandidate(
174 Instruction *TI, ArrayRef<Value *> Invariants,
175 std::optional<InstructionCost> Cost = std::nullopt,
176 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
177 : TI(TI), Invariants(Invariants), Cost(Cost),
178 PendingInjection(PendingInjection) {};
179
180 bool hasPendingInjection() const { return PendingInjection.has_value(); }
181};
182} // end anonymous namespace.
183
184// Helper to skip (select x, true, false), which matches both a logical AND and
185// OR and can confuse code that tries to determine if \p Cond is either a
186// logical AND or OR but not both.
188 Value *CondNext;
189 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
190 Cond = CondNext;
191 return Cond;
192}
193
194/// Collect all of the loop invariant input values transitively used by the
195/// homogeneous instruction graph from a given root.
196///
197/// This essentially walks from a root recursively through loop variant operands
198/// which have perform the same logical operation (AND or OR) and finds all
199/// inputs which are loop invariant. For some operations these can be
200/// re-associated and unswitched out of the loop entirely.
203 const LoopInfo &LI) {
204 assert(!L.isLoopInvariant(&Root) &&
205 "Only need to walk the graph if root itself is not invariant.");
206 TinyPtrVector<Value *> Invariants;
207
208 bool IsRootAnd = match(&Root, m_LogicalAnd());
209 bool IsRootOr = match(&Root, m_LogicalOr());
210
211 // Build a worklist and recurse through operators collecting invariants.
214 Worklist.push_back(&Root);
215 Visited.insert(&Root);
216 do {
217 Instruction &I = *Worklist.pop_back_val();
218 for (Value *OpV : I.operand_values()) {
219 // Skip constants as unswitching isn't interesting for them.
220 if (isa<Constant>(OpV))
221 continue;
222
223 // Add it to our result if loop invariant.
224 if (L.isLoopInvariant(OpV)) {
225 Invariants.push_back(OpV);
226 continue;
227 }
228
229 // If not an instruction with the same opcode, nothing we can do.
231
232 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
233 (IsRootOr && match(OpI, m_LogicalOr())))) {
234 // Visit this operand.
235 if (Visited.insert(OpI).second)
236 Worklist.push_back(OpI);
237 }
238 }
239 } while (!Worklist.empty());
240
241 return Invariants;
242}
243
244static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
245 Constant &Replacement) {
246 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
247
248 // Replace uses of LIC in the loop with the given constant.
249 // We use make_early_inc_range as set invalidates the iterator.
250 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
251 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
252
253 // Replace this use within the loop body.
254 if (UserI && L.contains(UserI))
255 U.set(&Replacement);
256 }
257}
258
259/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
260/// incoming values along this edge.
262 const BasicBlock &ExitingBB,
263 const BasicBlock &ExitBB) {
264 for (const Instruction &I : ExitBB) {
265 auto *PN = dyn_cast<PHINode>(&I);
266 if (!PN)
267 // No more PHIs to check.
268 return true;
269
270 // If the incoming value for this edge isn't loop invariant the unswitch
271 // won't be trivial.
272 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
273 return false;
274 }
275 llvm_unreachable("Basic blocks should never be empty!");
276}
277
278/// Copy a set of loop invariant values \p Invariants and insert them at the
279/// end of \p BB and conditionally branch on the copied condition. We only
280/// branch on a single value.
281/// We attempt to estimate the profile of the resulting conditional branch from
282/// \p ComputeProfFrom, which is the original conditional branch we're
283/// unswitching.
284/// When \p Direction is true, the \p Invariants form a disjunction, and the
285/// branch conditioned on it exits the loop on the "true" case. When \p
286/// Direction is false, the \p Invariants form a conjunction and the branch
287/// exits on the "false" case.
289 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
290 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
291 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
292 const BranchInst &ComputeProfFrom) {
293
294 SmallVector<uint32_t> BranchWeights;
295 bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
296 extractBranchWeights(ComputeProfFrom, BranchWeights);
297 // If Direction is true, that means we had a disjunction and that the "true"
298 // case exits. The probability of the disjunction of the subset of terms is at
299 // most as high as the original one. So, if the probability is higher than the
300 // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
301 // but if it's lower, we will use the original probability.
302 // Conversely, if Direction is false, that means we had a conjunction, and the
303 // probability of exiting is captured in the second branch weight. That
304 // probability is a disjunction (of the negation of the original terms). The
305 // same reasoning applies as above.
306 // Issue #165649: should we expect BFI to conserve, and use that to calculate
307 // the branch weights?
308 if (HasBranchWeights &&
309 static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
310 static_cast<double>(sum_of(BranchWeights)) >
311 0.5)
312 HasBranchWeights = false;
313
314 IRBuilder<> IRB(&BB);
316
317 SmallVector<Value *> FrozenInvariants;
318 for (Value *Inv : Invariants) {
319 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
320 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
321 FrozenInvariants.push_back(Inv);
322 }
323
324 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
325 : IRB.CreateAnd(FrozenInvariants);
326 auto *BR = IRB.CreateCondBr(
327 Cond, Direction ? &UnswitchedSucc : &NormalSucc,
328 Direction ? &NormalSucc : &UnswitchedSucc,
329 HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
330 : nullptr);
331 if (!HasBranchWeights)
333 *BR, *BR->getParent()->getParent(), DEBUG_TYPE);
334}
335
336/// Copy a set of loop invariant values, and conditionally branch on them.
338 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
339 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
340 MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
342 for (auto *Val : reverse(ToDuplicate)) {
343 Instruction *Inst = cast<Instruction>(Val);
344 Instruction *NewInst = Inst->clone();
345
346 if (const DebugLoc &DL = Inst->getDebugLoc())
347 mapAtomInstance(DL, VMap);
348
349 NewInst->insertInto(&BB, BB.end());
350 RemapInstruction(NewInst, VMap,
352 VMap[Val] = NewInst;
353
354 if (!MSSAU)
355 continue;
356
357 MemorySSA *MSSA = MSSAU->getMemorySSA();
358 if (auto *MemUse =
360 auto *DefiningAccess = MemUse->getDefiningAccess();
361 // Get the first defining access before the loop.
362 while (L.contains(DefiningAccess->getBlock())) {
363 // If the defining access is a MemoryPhi, get the incoming
364 // value for the pre-header as defining access.
365 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
366 DefiningAccess =
367 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
368 else
369 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
370 }
371 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
372 NewInst->getParent(),
374 }
375 }
376
377 IRBuilder<> IRB(&BB);
379 Value *Cond = VMap[ToDuplicate[0]];
380 // The expectation is that ToDuplicate[0] is the condition used by the
381 // OriginalBranch, case in which we can clone the profile metadata from there.
382 auto *ProfData =
384 ToDuplicate[0] == skipTrivialSelect(OriginalBranch.getCondition())
385 ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
386 : nullptr;
387 auto *BR =
388 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
389 Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
390 if (!ProfData)
391 setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(),
392 DEBUG_TYPE);
393}
394
395/// Rewrite the PHI nodes in an unswitched loop exit basic block.
396///
397/// Requires that the loop exit and unswitched basic block are the same, and
398/// that the exiting block was a unique predecessor of that block. Rewrites the
399/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
400/// PHI nodes from the old preheader that now contains the unswitched
401/// terminator.
403 BasicBlock &OldExitingBB,
404 BasicBlock &OldPH) {
405 for (PHINode &PN : UnswitchedBB.phis()) {
406 // When the loop exit is directly unswitched we just need to update the
407 // incoming basic block. We loop to handle weird cases with repeated
408 // incoming blocks, but expect to typically only have one operand here.
409 for (auto i : seq<int>(0, PN.getNumOperands())) {
410 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
411 "Found incoming block different from unique predecessor!");
412 PN.setIncomingBlock(i, &OldPH);
413 }
414 }
415}
416
417/// Rewrite the PHI nodes in the loop exit basic block and the split off
418/// unswitched block.
419///
420/// Because the exit block remains an exit from the loop, this rewrites the
421/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
422/// nodes into the unswitched basic block to select between the value in the
423/// old preheader and the loop exit.
425 BasicBlock &UnswitchedBB,
426 BasicBlock &OldExitingBB,
427 BasicBlock &OldPH,
428 bool FullUnswitch) {
429 assert(&ExitBB != &UnswitchedBB &&
430 "Must have different loop exit and unswitched blocks!");
431 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
432 for (PHINode &PN : ExitBB.phis()) {
433 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
434 PN.getName() + ".split");
435 NewPN->insertBefore(InsertPt);
436
437 // Walk backwards over the old PHI node's inputs to minimize the cost of
438 // removing each one. We have to do this weird loop manually so that we
439 // create the same number of new incoming edges in the new PHI as we expect
440 // each case-based edge to be included in the unswitched switch in some
441 // cases.
442 // FIXME: This is really, really gross. It would be much cleaner if LLVM
443 // allowed us to create a single entry for a predecessor block without
444 // having separate entries for each "edge" even though these edges are
445 // required to produce identical results.
446 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
447 if (PN.getIncomingBlock(i) != &OldExitingBB)
448 continue;
449
450 Value *Incoming = PN.getIncomingValue(i);
451 if (FullUnswitch)
452 // No more edge from the old exiting block to the exit block.
453 PN.removeIncomingValue(i);
454
455 NewPN->addIncoming(Incoming, &OldPH);
456 }
457
458 // Now replace the old PHI with the new one and wire the old one in as an
459 // input to the new one.
460 PN.replaceAllUsesWith(NewPN);
461 NewPN->addIncoming(&PN, &ExitBB);
462 }
463}
464
465/// Hoist the current loop up to the innermost loop containing a remaining exit.
466///
467/// Because we've removed an exit from the loop, we may have changed the set of
468/// loops reachable and need to move the current loop up the loop nest or even
469/// to an entirely separate nest.
470static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
471 DominatorTree &DT, LoopInfo &LI,
472 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
473 // If the loop is already at the top level, we can't hoist it anywhere.
474 Loop *OldParentL = L.getParentLoop();
475 if (!OldParentL)
476 return;
477
479 L.getExitBlocks(Exits);
480 Loop *NewParentL = nullptr;
481 for (auto *ExitBB : Exits)
482 if (Loop *ExitL = LI.getLoopFor(ExitBB))
483 if (!NewParentL || NewParentL->contains(ExitL))
484 NewParentL = ExitL;
485
486 if (NewParentL == OldParentL)
487 return;
488
489 // The new parent loop (if different) should always contain the old one.
490 if (NewParentL)
491 assert(NewParentL->contains(OldParentL) &&
492 "Can only hoist this loop up the nest!");
493
494 // The preheader will need to move with the body of this loop. However,
495 // because it isn't in this loop we also need to update the primary loop map.
496 assert(OldParentL == LI.getLoopFor(&Preheader) &&
497 "Parent loop of this loop should contain this loop's preheader!");
498 LI.changeLoopFor(&Preheader, NewParentL);
499
500 // Remove this loop from its old parent.
501 OldParentL->removeChildLoop(&L);
502
503 // Add the loop either to the new parent or as a top-level loop.
504 if (NewParentL)
505 NewParentL->addChildLoop(&L);
506 else
507 LI.addTopLevelLoop(&L);
508
509 // Remove this loops blocks from the old parent and every other loop up the
510 // nest until reaching the new parent. Also update all of these
511 // no-longer-containing loops to reflect the nesting change.
512 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
513 OldContainingL = OldContainingL->getParentLoop()) {
514 llvm::erase_if(OldContainingL->getBlocksVector(),
515 [&](const BasicBlock *BB) {
516 return BB == &Preheader || L.contains(BB);
517 });
518
519 OldContainingL->getBlocksSet().erase(&Preheader);
520 for (BasicBlock *BB : L.blocks())
521 OldContainingL->getBlocksSet().erase(BB);
522
523 // Because we just hoisted a loop out of this one, we have essentially
524 // created new exit paths from it. That means we need to form LCSSA PHI
525 // nodes for values used in the no-longer-nested loop.
526 formLCSSA(*OldContainingL, DT, &LI, SE);
527
528 // We shouldn't need to form dedicated exits because the exit introduced
529 // here is the (just split by unswitching) preheader. However, after trivial
530 // unswitching it is possible to get new non-dedicated exits out of parent
531 // loop so let's conservatively form dedicated exit blocks and figure out
532 // if we can optimize later.
533 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
534 /*PreserveLCSSA*/ true);
535 }
536}
537
538// Return the top-most loop containing ExitBB and having ExitBB as exiting block
539// or the loop containing ExitBB, if there is no parent loop containing ExitBB
540// as exiting block.
542 const LoopInfo &LI) {
543 Loop *TopMost = LI.getLoopFor(ExitBB);
544 Loop *Current = TopMost;
545 while (Current) {
546 if (Current->isLoopExiting(ExitBB))
547 TopMost = Current;
548 Current = Current->getParentLoop();
549 }
550 return TopMost;
551}
552
553/// Unswitch a trivial branch if the condition is loop invariant.
554///
555/// This routine should only be called when loop code leading to the branch has
556/// been validated as trivial (no side effects). This routine checks if the
557/// condition is invariant and one of the successors is a loop exit. This
558/// allows us to unswitch without duplicating the loop, making it trivial.
559///
560/// If this routine fails to unswitch the branch it returns false.
561///
562/// If the branch can be unswitched, this routine splits the preheader and
563/// hoists the branch above that split. Preserves loop simplified form
564/// (splitting the exit block as necessary). It simplifies the branch within
565/// the loop to an unconditional branch but doesn't remove it entirely. Further
566/// cleanup can be done with some simplifycfg like pass.
567///
568/// If `SE` is not null, it will be updated based on the potential loop SCEVs
569/// invalidated by this.
571 LoopInfo &LI, ScalarEvolution *SE,
572 MemorySSAUpdater *MSSAU) {
573 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
574 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
575
576 // The loop invariant values that we want to unswitch.
577 TinyPtrVector<Value *> Invariants;
578
579 // When true, we're fully unswitching the branch rather than just unswitching
580 // some input conditions to the branch.
581 bool FullUnswitch = false;
582
584 if (L.isLoopInvariant(Cond)) {
585 Invariants.push_back(Cond);
586 FullUnswitch = true;
587 } else {
588 if (auto *CondInst = dyn_cast<Instruction>(Cond))
589 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
590 if (Invariants.empty()) {
591 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
592 return false;
593 }
594 }
595
596 // Check that one of the branch's successors exits, and which one.
597 bool ExitDirection = true;
598 int LoopExitSuccIdx = 0;
599 auto *LoopExitBB = BI.getSuccessor(0);
600 if (L.contains(LoopExitBB)) {
601 ExitDirection = false;
602 LoopExitSuccIdx = 1;
603 LoopExitBB = BI.getSuccessor(1);
604 if (L.contains(LoopExitBB)) {
605 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
606 return false;
607 }
608 }
609 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
610 auto *ParentBB = BI.getParent();
611 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
612 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
613 return false;
614 }
615
616 // When unswitching only part of the branch's condition, we need the exit
617 // block to be reached directly from the partially unswitched input. This can
618 // be done when the exit block is along the true edge and the branch condition
619 // is a graph of `or` operations, or the exit block is along the false edge
620 // and the condition is a graph of `and` operations.
621 if (!FullUnswitch) {
622 if (ExitDirection ? !match(Cond, m_LogicalOr())
623 : !match(Cond, m_LogicalAnd())) {
624 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
625 "non-full unswitch!\n");
626 return false;
627 }
628 }
629
630 LLVM_DEBUG({
631 dbgs() << " unswitching trivial invariant conditions for: " << BI
632 << "\n";
633 for (Value *Invariant : Invariants) {
634 dbgs() << " " << *Invariant << " == true";
635 if (Invariant != Invariants.back())
636 dbgs() << " ||";
637 dbgs() << "\n";
638 }
639 });
640
641 // If we have scalar evolutions, we need to invalidate them including this
642 // loop, the loop containing the exit block and the topmost parent loop
643 // exiting via LoopExitBB.
644 if (SE) {
645 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
646 SE->forgetLoop(ExitL);
647 else
648 // Forget the entire nest as this exits the entire nest.
649 SE->forgetTopmostLoop(&L);
651 }
652
653 if (MSSAU && VerifyMemorySSA)
654 MSSAU->getMemorySSA()->verifyMemorySSA();
655
656 // Split the preheader, so that we know that there is a safe place to insert
657 // the conditional branch. We will change the preheader to have a conditional
658 // branch on LoopCond.
659 BasicBlock *OldPH = L.getLoopPreheader();
660 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
661
662 // Now that we have a place to insert the conditional branch, create a place
663 // to branch to: this is the exit block out of the loop that we are
664 // unswitching. We need to split this if there are other loop predecessors.
665 // Because the loop is in simplified form, *any* other predecessor is enough.
666 BasicBlock *UnswitchedBB;
667 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
668 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
669 "A branch's parent isn't a predecessor!");
670 UnswitchedBB = LoopExitBB;
671 } else {
672 UnswitchedBB =
673 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
674 }
675
676 if (MSSAU && VerifyMemorySSA)
677 MSSAU->getMemorySSA()->verifyMemorySSA();
678
679 // Actually move the invariant uses into the unswitched position. If possible,
680 // we do this by moving the instructions, but when doing partial unswitching
681 // we do it by building a new merge of the values in the unswitched position.
682 OldPH->getTerminator()->eraseFromParent();
683 if (FullUnswitch) {
684 // If fully unswitching, we can use the existing branch instruction.
685 // Splice it into the old PH to gate reaching the new preheader and re-point
686 // its successors.
687 BI.moveBefore(*OldPH, OldPH->end());
688 BI.setCondition(Cond);
689 if (MSSAU) {
690 // Temporarily clone the terminator, to make MSSA update cheaper by
691 // separating "insert edge" updates from "remove edge" ones.
692 BI.clone()->insertInto(ParentBB, ParentBB->end());
693 } else {
694 // Create a new unconditional branch that will continue the loop as a new
695 // terminator.
696 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
697 NewBI->setDebugLoc(BI.getDebugLoc());
698 }
699 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
700 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
701 } else {
702 // Only unswitching a subset of inputs to the condition, so we will need to
703 // build a new branch that merges the invariant inputs.
704 if (ExitDirection)
706 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
707 "condition!");
708 else
710 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
711 " condition!");
713 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
714 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
715 }
716
717 // Update the dominator tree with the added edge.
718 DT.insertEdge(OldPH, UnswitchedBB);
719
720 // After the dominator tree was updated with the added edge, update MemorySSA
721 // if available.
722 if (MSSAU) {
724 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
725 MSSAU->applyInsertUpdates(Updates, DT);
726 }
727
728 // Finish updating dominator tree and memory ssa for full unswitch.
729 if (FullUnswitch) {
730 if (MSSAU) {
731 Instruction *Term = ParentBB->getTerminator();
732 // Remove the cloned branch instruction and create unconditional branch
733 // now.
734 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
735 NewBI->setDebugLoc(Term->getDebugLoc());
736 Term->eraseFromParent();
737 MSSAU->removeEdge(ParentBB, LoopExitBB);
738 }
739 DT.deleteEdge(ParentBB, LoopExitBB);
740 }
741
742 if (MSSAU && VerifyMemorySSA)
743 MSSAU->getMemorySSA()->verifyMemorySSA();
744
745 // Rewrite the relevant PHI nodes.
746 if (UnswitchedBB == LoopExitBB)
747 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
748 else
749 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
750 *ParentBB, *OldPH, FullUnswitch);
751
752 // The constant we can replace all of our invariants with inside the loop
753 // body. If any of the invariants have a value other than this the loop won't
754 // be entered.
755 ConstantInt *Replacement = ExitDirection
758
759 // Since this is an i1 condition we can also trivially replace uses of it
760 // within the loop with a constant.
761 for (Value *Invariant : Invariants)
762 replaceLoopInvariantUses(L, Invariant, *Replacement);
763
764 // If this was full unswitching, we may have changed the nesting relationship
765 // for this loop so hoist it to its correct parent if needed.
766 if (FullUnswitch)
767 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
768
769 if (MSSAU && VerifyMemorySSA)
770 MSSAU->getMemorySSA()->verifyMemorySSA();
771
772 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
773 ++NumTrivial;
774 ++NumBranches;
775 return true;
776}
777
778/// Unswitch a trivial switch if the condition is loop invariant.
779///
780/// This routine should only be called when loop code leading to the switch has
781/// been validated as trivial (no side effects). This routine checks if the
782/// condition is invariant and that at least one of the successors is a loop
783/// exit. This allows us to unswitch without duplicating the loop, making it
784/// trivial.
785///
786/// If this routine fails to unswitch the switch it returns false.
787///
788/// If the switch can be unswitched, this routine splits the preheader and
789/// copies the switch above that split. If the default case is one of the
790/// exiting cases, it copies the non-exiting cases and points them at the new
791/// preheader. If the default case is not exiting, it copies the exiting cases
792/// and points the default at the preheader. It preserves loop simplified form
793/// (splitting the exit blocks as necessary). It simplifies the switch within
794/// the loop by removing now-dead cases. If the default case is one of those
795/// unswitched, it replaces its destination with a new basic block containing
796/// only unreachable. Such basic blocks, while technically loop exits, are not
797/// considered for unswitching so this is a stable transform and the same
798/// switch will not be revisited. If after unswitching there is only a single
799/// in-loop successor, the switch is further simplified to an unconditional
800/// branch. Still more cleanup can be done with some simplifycfg like pass.
801///
802/// If `SE` is not null, it will be updated based on the potential loop SCEVs
803/// invalidated by this.
805 LoopInfo &LI, ScalarEvolution *SE,
806 MemorySSAUpdater *MSSAU) {
807 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
808 Value *LoopCond = SI.getCondition();
809
810 // If this isn't switching on an invariant condition, we can't unswitch it.
811 if (!L.isLoopInvariant(LoopCond))
812 return false;
813
814 auto *ParentBB = SI.getParent();
815
816 // The same check must be used both for the default and the exit cases. We
817 // should never leave edges from the switch instruction to a basic block that
818 // we are unswitching, hence the condition used to determine the default case
819 // needs to also be used to populate ExitCaseIndices, which is then used to
820 // remove cases from the switch.
821 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
822 // BBToCheck is not an exit block if it is inside loop L.
823 if (L.contains(&BBToCheck))
824 return false;
825 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
826 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
827 return false;
828 // We do not unswitch a block that only has an unreachable statement, as
829 // it's possible this is a previously unswitched block. Only unswitch if
830 // either the terminator is not unreachable, or, if it is, it's not the only
831 // instruction in the block.
832 auto *TI = BBToCheck.getTerminator();
833 bool isUnreachable = isa<UnreachableInst>(TI);
834 return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
835 };
836
837 SmallVector<int, 4> ExitCaseIndices;
838 for (auto Case : SI.cases())
839 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
840 ExitCaseIndices.push_back(Case.getCaseIndex());
841 BasicBlock *DefaultExitBB = nullptr;
844 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
845 DefaultExitBB = SI.getDefaultDest();
846 } else if (ExitCaseIndices.empty())
847 return false;
848
849 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
850
851 if (MSSAU && VerifyMemorySSA)
852 MSSAU->getMemorySSA()->verifyMemorySSA();
853
854 // We may need to invalidate SCEVs for the outermost loop reached by any of
855 // the exits.
856 Loop *OuterL = &L;
857
858 if (DefaultExitBB) {
859 // Check the loop containing this exit.
860 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
861 if (!ExitL || ExitL->contains(OuterL))
862 OuterL = ExitL;
863 }
864 for (unsigned Index : ExitCaseIndices) {
865 auto CaseI = SI.case_begin() + Index;
866 // Compute the outer loop from this exit.
867 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
868 if (!ExitL || ExitL->contains(OuterL))
869 OuterL = ExitL;
870 }
871
872 if (SE) {
873 if (OuterL)
874 SE->forgetLoop(OuterL);
875 else
876 SE->forgetTopmostLoop(&L);
877 }
878
879 if (DefaultExitBB) {
880 // Clear out the default destination temporarily to allow accurate
881 // predecessor lists to be examined below.
882 SI.setDefaultDest(nullptr);
883 }
884
885 // Store the exit cases into a separate data structure and remove them from
886 // the switch.
887 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
889 4> ExitCases;
890 ExitCases.reserve(ExitCaseIndices.size());
892 // We walk the case indices backwards so that we remove the last case first
893 // and don't disrupt the earlier indices.
894 for (unsigned Index : reverse(ExitCaseIndices)) {
895 auto CaseI = SI.case_begin() + Index;
896 // Save the value of this case.
897 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
898 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
899 // Delete the unswitched cases.
900 SIW.removeCase(CaseI);
901 }
902
903 // Check if after this all of the remaining cases point at the same
904 // successor.
905 BasicBlock *CommonSuccBB = nullptr;
906 if (SI.getNumCases() > 0 &&
907 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
908 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
909 }))
910 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
911 if (!DefaultExitBB) {
912 // If we're not unswitching the default, we need it to match any cases to
913 // have a common successor or if we have no cases it is the common
914 // successor.
915 if (SI.getNumCases() == 0)
916 CommonSuccBB = SI.getDefaultDest();
917 else if (SI.getDefaultDest() != CommonSuccBB)
918 CommonSuccBB = nullptr;
919 }
920
921 // Split the preheader, so that we know that there is a safe place to insert
922 // the switch.
923 BasicBlock *OldPH = L.getLoopPreheader();
924 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
925 OldPH->getTerminator()->eraseFromParent();
926
927 // Now add the unswitched switch. This new switch instruction inherits the
928 // debug location of the old switch, because it semantically replace the old
929 // one.
930 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
931 NewSI->setDebugLoc(SIW->getDebugLoc());
932 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
933
934 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
935 // First, we split any exit blocks with remaining in-loop predecessors. Then
936 // we update the PHIs in one of two ways depending on if there was a split.
937 // We walk in reverse so that we split in the same order as the cases
938 // appeared. This is purely for convenience of reading the resulting IR, but
939 // it doesn't cost anything really.
940 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
942 // Handle the default exit if necessary.
943 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
944 // ranges aren't quite powerful enough yet.
945 if (DefaultExitBB) {
946 if (pred_empty(DefaultExitBB)) {
947 UnswitchedExitBBs.insert(DefaultExitBB);
948 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
949 } else {
950 auto *SplitBB =
951 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
952 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
953 *ParentBB, *OldPH,
954 /*FullUnswitch*/ true);
955 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
956 }
957 }
958 // Note that we must use a reference in the for loop so that we update the
959 // container.
960 for (auto &ExitCase : reverse(ExitCases)) {
961 // Grab a reference to the exit block in the pair so that we can update it.
962 BasicBlock *ExitBB = std::get<1>(ExitCase);
963
964 // If this case is the last edge into the exit block, we can simply reuse it
965 // as it will no longer be a loop exit. No mapping necessary.
966 if (pred_empty(ExitBB)) {
967 // Only rewrite once.
968 if (UnswitchedExitBBs.insert(ExitBB).second)
969 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
970 continue;
971 }
972
973 // Otherwise we need to split the exit block so that we retain an exit
974 // block from the loop and a target for the unswitched condition.
975 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
976 if (!SplitExitBB) {
977 // If this is the first time we see this, do the split and remember it.
978 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
979 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
980 *ParentBB, *OldPH,
981 /*FullUnswitch*/ true);
982 }
983 // Update the case pair to point to the split block.
984 std::get<1>(ExitCase) = SplitExitBB;
985 }
986
987 // Now add the unswitched cases. We do this in reverse order as we built them
988 // in reverse order.
989 for (auto &ExitCase : reverse(ExitCases)) {
990 ConstantInt *CaseVal = std::get<0>(ExitCase);
991 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
992
993 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
994 }
995
996 // If the default was unswitched, re-point it and add explicit cases for
997 // entering the loop.
998 if (DefaultExitBB) {
999 NewSIW->setDefaultDest(DefaultExitBB);
1000 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
1001
1002 // We removed all the exit cases, so we just copy the cases to the
1003 // unswitched switch.
1004 for (const auto &Case : SI.cases())
1005 NewSIW.addCase(Case.getCaseValue(), NewPH,
1007 } else if (DefaultCaseWeight) {
1008 // We have to set branch weight of the default case.
1009 uint64_t SW = *DefaultCaseWeight;
1010 for (const auto &Case : SI.cases()) {
1011 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
1012 assert(W &&
1013 "case weight must be defined as default case weight is defined");
1014 SW += *W;
1015 }
1016 NewSIW.setSuccessorWeight(0, SW);
1017 }
1018
1019 // If we ended up with a common successor for every path through the switch
1020 // after unswitching, rewrite it to an unconditional branch to make it easy
1021 // to recognize. Otherwise we potentially have to recognize the default case
1022 // pointing at unreachable and other complexity.
1023 if (CommonSuccBB) {
1024 BasicBlock *BB = SI.getParent();
1025 // We may have had multiple edges to this common successor block, so remove
1026 // them as predecessors. We skip the first one, either the default or the
1027 // actual first case.
1028 bool SkippedFirst = DefaultExitBB == nullptr;
1029 for (auto Case : SI.cases()) {
1030 assert(Case.getCaseSuccessor() == CommonSuccBB &&
1031 "Non-common successor!");
1032 (void)Case;
1033 if (!SkippedFirst) {
1034 SkippedFirst = true;
1035 continue;
1036 }
1037 CommonSuccBB->removePredecessor(BB,
1038 /*KeepOneInputPHIs*/ true);
1039 }
1040 // Now nuke the switch and replace it with a direct branch.
1041 Instruction *NewBI = BranchInst::Create(CommonSuccBB, BB);
1042 NewBI->setDebugLoc(SIW->getDebugLoc());
1043 SIW.eraseFromParent();
1044 } else if (DefaultExitBB) {
1045 assert(SI.getNumCases() > 0 &&
1046 "If we had no cases we'd have a common successor!");
1047 // Move the last case to the default successor. This is valid as if the
1048 // default got unswitched it cannot be reached. This has the advantage of
1049 // being simple and keeping the number of edges from this switch to
1050 // successors the same, and avoiding any PHI update complexity.
1051 auto LastCaseI = std::prev(SI.case_end());
1052
1053 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1055 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
1056 SIW.removeCase(LastCaseI);
1057 }
1058
1059 // Walk the unswitched exit blocks and the unswitched split blocks and update
1060 // the dominator tree based on the CFG edits. While we are walking unordered
1061 // containers here, the API for applyUpdates takes an unordered list of
1062 // updates and requires them to not contain duplicates.
1064 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1065 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1066 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1067 }
1068 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1069 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1070 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1071 }
1072
1073 if (MSSAU) {
1074 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1075 if (VerifyMemorySSA)
1076 MSSAU->getMemorySSA()->verifyMemorySSA();
1077 } else {
1078 DT.applyUpdates(DTUpdates);
1079 }
1080
1081 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1082
1083 // We may have changed the nesting relationship for this loop so hoist it to
1084 // its correct parent if needed.
1085 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1086
1087 if (MSSAU && VerifyMemorySSA)
1088 MSSAU->getMemorySSA()->verifyMemorySSA();
1089
1090 ++NumTrivial;
1091 ++NumSwitches;
1092 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1093 return true;
1094}
1095
1096/// This routine scans the loop to find a branch or switch which occurs before
1097/// any side effects occur. These can potentially be unswitched without
1098/// duplicating the loop. If a branch or switch is successfully unswitched the
1099/// scanning continues to see if subsequent branches or switches have become
1100/// trivial. Once all trivial candidates have been unswitched, this routine
1101/// returns.
1102///
1103/// The return value indicates whether anything was unswitched (and therefore
1104/// changed).
1105///
1106/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1107/// invalidated by this.
1109 LoopInfo &LI, ScalarEvolution *SE,
1110 MemorySSAUpdater *MSSAU) {
1111 bool Changed = false;
1112
1113 // If loop header has only one reachable successor we should keep looking for
1114 // trivial condition candidates in the successor as well. An alternative is
1115 // to constant fold conditions and merge successors into loop header (then we
1116 // only need to check header's terminator). The reason for not doing this in
1117 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1118 // invariants. Folding dead branches could either eliminate the current loop
1119 // or make other loops unreachable. LCSSA form might also not be preserved
1120 // after deleting branches. The following code keeps traversing loop header's
1121 // successors until it finds the trivial condition candidate (condition that
1122 // is not a constant). Since unswitching generates branches with constant
1123 // conditions, this scenario could be very common in practice.
1124 BasicBlock *CurrentBB = L.getHeader();
1126 Visited.insert(CurrentBB);
1127 do {
1128 // Check if there are any side-effecting instructions (e.g. stores, calls,
1129 // volatile loads) in the part of the loop that the code *would* execute
1130 // without unswitching.
1131 if (MSSAU) // Possible early exit with MSSA
1132 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1133 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1134 return Changed;
1135 if (llvm::any_of(*CurrentBB,
1136 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1137 return Changed;
1138
1139 Instruction *CurrentTerm = CurrentBB->getTerminator();
1140
1141 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1142 // Don't bother trying to unswitch past a switch with a constant
1143 // condition. This should be removed prior to running this pass by
1144 // simplifycfg.
1145 if (isa<Constant>(SI->getCondition()))
1146 return Changed;
1147
1148 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1149 // Couldn't unswitch this one so we're done.
1150 return Changed;
1151
1152 // Mark that we managed to unswitch something.
1153 Changed = true;
1154
1155 // If unswitching turned the terminator into an unconditional branch then
1156 // we can continue. The unswitching logic specifically works to fold any
1157 // cases it can into an unconditional branch to make it easier to
1158 // recognize here.
1159 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1160 if (!BI || BI->isConditional())
1161 return Changed;
1162
1163 CurrentBB = BI->getSuccessor(0);
1164 continue;
1165 }
1166
1167 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1168 if (!BI)
1169 // We do not understand other terminator instructions.
1170 return Changed;
1171
1172 // Don't bother trying to unswitch past an unconditional branch or a branch
1173 // with a constant value. These should be removed by simplifycfg prior to
1174 // running this pass.
1175 if (!BI->isConditional() ||
1176 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1177 return Changed;
1178
1179 // Found a trivial condition candidate: non-foldable conditional branch. If
1180 // we fail to unswitch this, we can't do anything else that is trivial.
1181 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1182 return Changed;
1183
1184 // Mark that we managed to unswitch something.
1185 Changed = true;
1186
1187 // If we only unswitched some of the conditions feeding the branch, we won't
1188 // have collapsed it to a single successor.
1189 BI = cast<BranchInst>(CurrentBB->getTerminator());
1190 if (BI->isConditional())
1191 return Changed;
1192
1193 // Follow the newly unconditional branch into its successor.
1194 CurrentBB = BI->getSuccessor(0);
1195
1196 // When continuing, if we exit the loop or reach a previous visited block,
1197 // then we can not reach any trivial condition candidates (unfoldable
1198 // branch instructions or switch instructions) and no unswitch can happen.
1199 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1200
1201 return Changed;
1202}
1203
1204/// Build the cloned blocks for an unswitched copy of the given loop.
1205///
1206/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1207/// after the split block (`SplitBB`) that will be used to select between the
1208/// cloned and original loop.
1209///
1210/// This routine handles cloning all of the necessary loop blocks and exit
1211/// blocks including rewriting their instructions and the relevant PHI nodes.
1212/// Any loop blocks or exit blocks which are dominated by a different successor
1213/// than the one for this clone of the loop blocks can be trivially skipped. We
1214/// use the `DominatingSucc` map to determine whether a block satisfies that
1215/// property with a simple map lookup.
1216///
1217/// It also correctly creates the unconditional branch in the cloned
1218/// unswitched parent block to only point at the unswitched successor.
1219///
1220/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1221/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1222/// the cloned blocks (and their loops) are left without full `LoopInfo`
1223/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1224/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1225/// instead the caller must recompute an accurate DT. It *does* correctly
1226/// update the `AssumptionCache` provided in `AC`.
1228 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1229 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1230 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1232 ValueToValueMapTy &VMap,
1234 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1235 ScalarEvolution *SE) {
1237 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1238
1239 // We will need to clone a bunch of blocks, wrap up the clone operation in
1240 // a helper.
1241 auto CloneBlock = [&](BasicBlock *OldBB) {
1242 // Clone the basic block and insert it before the new preheader.
1243 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1244 NewBB->moveBefore(LoopPH);
1245
1246 // Record this block and the mapping.
1247 NewBlocks.push_back(NewBB);
1248 VMap[OldBB] = NewBB;
1249
1250 return NewBB;
1251 };
1252
1253 // We skip cloning blocks when they have a dominating succ that is not the
1254 // succ we are cloning for.
1255 auto SkipBlock = [&](BasicBlock *BB) {
1256 auto It = DominatingSucc.find(BB);
1257 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1258 };
1259
1260 // First, clone the preheader.
1261 auto *ClonedPH = CloneBlock(LoopPH);
1262
1263 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1264 for (auto *LoopBB : L.blocks())
1265 if (!SkipBlock(LoopBB))
1266 CloneBlock(LoopBB);
1267
1268 // Split all the loop exit edges so that when we clone the exit blocks, if
1269 // any of the exit blocks are *also* a preheader for some other loop, we
1270 // don't create multiple predecessors entering the loop header.
1271 for (auto *ExitBB : ExitBlocks) {
1272 if (SkipBlock(ExitBB))
1273 continue;
1274
1275 // When we are going to clone an exit, we don't need to clone all the
1276 // instructions in the exit block and we want to ensure we have an easy
1277 // place to merge the CFG, so split the exit first. This is always safe to
1278 // do because there cannot be any non-loop predecessors of a loop exit in
1279 // loop simplified form.
1280 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1281
1282 // Rearrange the names to make it easier to write test cases by having the
1283 // exit block carry the suffix rather than the merge block carrying the
1284 // suffix.
1285 MergeBB->takeName(ExitBB);
1286 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1287
1288 // Now clone the original exit block.
1289 auto *ClonedExitBB = CloneBlock(ExitBB);
1290 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1291 "Exit block should have been split to have one successor!");
1292 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1293 "Cloned exit block has the wrong successor!");
1294
1295 // Remap any cloned instructions and create a merge phi node for them.
1296 for (auto ZippedInsts : llvm::zip_first(
1297 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1298 llvm::make_range(ClonedExitBB->begin(),
1299 std::prev(ClonedExitBB->end())))) {
1300 Instruction &I = std::get<0>(ZippedInsts);
1301 Instruction &ClonedI = std::get<1>(ZippedInsts);
1302
1303 // The only instructions in the exit block should be PHI nodes and
1304 // potentially a landing pad.
1305 assert(
1307 "Bad instruction in exit block!");
1308 // We should have a value map between the instruction and its clone.
1309 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1310
1311 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1312 if (SE)
1313 if (auto *PN = dyn_cast<PHINode>(&I))
1315
1316 BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1317
1318 auto *MergePN =
1319 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1320 MergePN->insertBefore(InsertPt);
1321 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1322 I.replaceAllUsesWith(MergePN);
1323 MergePN->addIncoming(&I, ExitBB);
1324 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1325 }
1326 }
1327
1328 // Rewrite the instructions in the cloned blocks to refer to the instructions
1329 // in the cloned blocks. We have to do this as a second pass so that we have
1330 // everything available. Also, we have inserted new instructions which may
1331 // include assume intrinsics, so we update the assumption cache while
1332 // processing this.
1333 Module *M = ClonedPH->getParent()->getParent();
1334 for (auto *ClonedBB : NewBlocks)
1335 for (Instruction &I : *ClonedBB) {
1336 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1338 RemapInstruction(&I, VMap,
1340 if (auto *II = dyn_cast<AssumeInst>(&I))
1342 }
1343
1344 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1345 // have spurious incoming values.
1346 for (auto *LoopBB : L.blocks())
1347 if (SkipBlock(LoopBB))
1348 for (auto *SuccBB : successors(LoopBB))
1349 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1350 for (PHINode &PN : ClonedSuccBB->phis())
1351 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1352
1353 // Remove the cloned parent as a predecessor of any successor we ended up
1354 // cloning other than the unswitched one.
1355 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1356 for (auto *SuccBB : successors(ParentBB)) {
1357 if (SuccBB == UnswitchedSuccBB)
1358 continue;
1359
1360 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1361 if (!ClonedSuccBB)
1362 continue;
1363
1364 ClonedSuccBB->removePredecessor(ClonedParentBB,
1365 /*KeepOneInputPHIs*/ true);
1366 }
1367
1368 // Replace the cloned branch with an unconditional branch to the cloned
1369 // unswitched successor.
1370 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1371 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1372 // Trivial Simplification. If Terminator is a conditional branch and
1373 // condition becomes dead - erase it.
1374 Value *ClonedConditionToErase = nullptr;
1375 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1376 ClonedConditionToErase = BI->getCondition();
1377 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1378 ClonedConditionToErase = SI->getCondition();
1379
1380 Instruction *BI = BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1381 BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1382 ClonedTerminator->eraseFromParent();
1383
1384 if (ClonedConditionToErase)
1385 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1386 MSSAU);
1387
1388 // If there are duplicate entries in the PHI nodes because of multiple edges
1389 // to the unswitched successor, we need to nuke all but one as we replaced it
1390 // with a direct branch.
1391 for (PHINode &PN : ClonedSuccBB->phis()) {
1392 bool Found = false;
1393 // Loop over the incoming operands backwards so we can easily delete as we
1394 // go without invalidating the index.
1395 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1396 if (PN.getIncomingBlock(i) != ClonedParentBB)
1397 continue;
1398 if (!Found) {
1399 Found = true;
1400 continue;
1401 }
1402 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1403 }
1404 }
1405
1406 // Record the domtree updates for the new blocks.
1408 for (auto *ClonedBB : NewBlocks) {
1409 for (auto *SuccBB : successors(ClonedBB))
1410 if (SuccSet.insert(SuccBB).second)
1411 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1412 SuccSet.clear();
1413 }
1414
1415 return ClonedPH;
1416}
1417
1418/// Recursively clone the specified loop and all of its children.
1419///
1420/// The target parent loop for the clone should be provided, or can be null if
1421/// the clone is a top-level loop. While cloning, all the blocks are mapped
1422/// with the provided value map. The entire original loop must be present in
1423/// the value map. The cloned loop is returned.
1424static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1425 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1426 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1427 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1428 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1429 for (auto *BB : OrigL.blocks()) {
1430 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1431 ClonedL.addBlockEntry(ClonedBB);
1432 if (LI.getLoopFor(BB) == &OrigL)
1433 LI.changeLoopFor(ClonedBB, &ClonedL);
1434 }
1435 };
1436
1437 // We specially handle the first loop because it may get cloned into
1438 // a different parent and because we most commonly are cloning leaf loops.
1439 Loop *ClonedRootL = LI.AllocateLoop();
1440 if (RootParentL)
1441 RootParentL->addChildLoop(ClonedRootL);
1442 else
1443 LI.addTopLevelLoop(ClonedRootL);
1444 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1445
1446 if (OrigRootL.isInnermost())
1447 return ClonedRootL;
1448
1449 // If we have a nest, we can quickly clone the entire loop nest using an
1450 // iterative approach because it is a tree. We keep the cloned parent in the
1451 // data structure to avoid repeatedly querying through a map to find it.
1452 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1453 // Build up the loops to clone in reverse order as we'll clone them from the
1454 // back.
1455 for (Loop *ChildL : llvm::reverse(OrigRootL))
1456 LoopsToClone.push_back({ClonedRootL, ChildL});
1457 do {
1458 Loop *ClonedParentL, *L;
1459 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1460 Loop *ClonedL = LI.AllocateLoop();
1461 ClonedParentL->addChildLoop(ClonedL);
1462 AddClonedBlocksToLoop(*L, *ClonedL);
1463 for (Loop *ChildL : llvm::reverse(*L))
1464 LoopsToClone.push_back({ClonedL, ChildL});
1465 } while (!LoopsToClone.empty());
1466
1467 return ClonedRootL;
1468}
1469
1470/// Build the cloned loops of an original loop from unswitching.
1471///
1472/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1473/// operation. We need to re-verify that there even is a loop (as the backedge
1474/// may not have been cloned), and even if there are remaining backedges the
1475/// backedge set may be different. However, we know that each child loop is
1476/// undisturbed, we only need to find where to place each child loop within
1477/// either any parent loop or within a cloned version of the original loop.
1478///
1479/// Because child loops may end up cloned outside of any cloned version of the
1480/// original loop, multiple cloned sibling loops may be created. All of them
1481/// are returned so that the newly introduced loop nest roots can be
1482/// identified.
1483static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1484 const ValueToValueMapTy &VMap, LoopInfo &LI,
1485 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1486 Loop *ClonedL = nullptr;
1487
1488 auto *OrigPH = OrigL.getLoopPreheader();
1489 auto *OrigHeader = OrigL.getHeader();
1490
1491 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1492 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1493
1494 // We need to know the loops of the cloned exit blocks to even compute the
1495 // accurate parent loop. If we only clone exits to some parent of the
1496 // original parent, we want to clone into that outer loop. We also keep track
1497 // of the loops that our cloned exit blocks participate in.
1498 Loop *ParentL = nullptr;
1499 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1501 ClonedExitsInLoops.reserve(ExitBlocks.size());
1502 for (auto *ExitBB : ExitBlocks)
1503 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1504 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1505 ExitLoopMap[ClonedExitBB] = ExitL;
1506 ClonedExitsInLoops.push_back(ClonedExitBB);
1507 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1508 ParentL = ExitL;
1509 }
1510 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1511 ParentL->contains(OrigL.getParentLoop())) &&
1512 "The computed parent loop should always contain (or be) the parent of "
1513 "the original loop.");
1514
1515 // We build the set of blocks dominated by the cloned header from the set of
1516 // cloned blocks out of the original loop. While not all of these will
1517 // necessarily be in the cloned loop, it is enough to establish that they
1518 // aren't in unreachable cycles, etc.
1519 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1520 for (auto *BB : OrigL.blocks())
1521 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1522 ClonedLoopBlocks.insert(ClonedBB);
1523
1524 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1525 // skipped cloning some region of this loop which can in turn skip some of
1526 // the backedges so we have to rebuild the blocks in the loop based on the
1527 // backedges that remain after cloning.
1529 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1530 for (auto *Pred : predecessors(ClonedHeader)) {
1531 // The only possible non-loop header predecessor is the preheader because
1532 // we know we cloned the loop in simplified form.
1533 if (Pred == ClonedPH)
1534 continue;
1535
1536 // Because the loop was in simplified form, the only non-loop predecessor
1537 // should be the preheader.
1538 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1539 "header other than the preheader "
1540 "that is not part of the loop!");
1541
1542 // Insert this block into the loop set and on the first visit (and if it
1543 // isn't the header we're currently walking) put it into the worklist to
1544 // recurse through.
1545 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1546 Worklist.push_back(Pred);
1547 }
1548
1549 // If we had any backedges then there *is* a cloned loop. Put the header into
1550 // the loop set and then walk the worklist backwards to find all the blocks
1551 // that remain within the loop after cloning.
1552 if (!BlocksInClonedLoop.empty()) {
1553 BlocksInClonedLoop.insert(ClonedHeader);
1554
1555 while (!Worklist.empty()) {
1556 BasicBlock *BB = Worklist.pop_back_val();
1557 assert(BlocksInClonedLoop.count(BB) &&
1558 "Didn't put block into the loop set!");
1559
1560 // Insert any predecessors that are in the possible set into the cloned
1561 // set, and if the insert is successful, add them to the worklist. Note
1562 // that we filter on the blocks that are definitely reachable via the
1563 // backedge to the loop header so we may prune out dead code within the
1564 // cloned loop.
1565 for (auto *Pred : predecessors(BB))
1566 if (ClonedLoopBlocks.count(Pred) &&
1567 BlocksInClonedLoop.insert(Pred).second)
1568 Worklist.push_back(Pred);
1569 }
1570
1571 ClonedL = LI.AllocateLoop();
1572 if (ParentL) {
1573 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1574 ParentL->addChildLoop(ClonedL);
1575 } else {
1576 LI.addTopLevelLoop(ClonedL);
1577 }
1578 NonChildClonedLoops.push_back(ClonedL);
1579
1580 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1581 // We don't want to just add the cloned loop blocks based on how we
1582 // discovered them. The original order of blocks was carefully built in
1583 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1584 // that logic, we just re-walk the original blocks (and those of the child
1585 // loops) and filter them as we add them into the cloned loop.
1586 for (auto *BB : OrigL.blocks()) {
1587 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1588 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1589 continue;
1590
1591 // Directly add the blocks that are only in this loop.
1592 if (LI.getLoopFor(BB) == &OrigL) {
1593 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1594 continue;
1595 }
1596
1597 // We want to manually add it to this loop and parents.
1598 // Registering it with LoopInfo will happen when we clone the top
1599 // loop for this block.
1600 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1601 PL->addBlockEntry(ClonedBB);
1602 }
1603
1604 // Now add each child loop whose header remains within the cloned loop. All
1605 // of the blocks within the loop must satisfy the same constraints as the
1606 // header so once we pass the header checks we can just clone the entire
1607 // child loop nest.
1608 for (Loop *ChildL : OrigL) {
1609 auto *ClonedChildHeader =
1610 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1611 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1612 continue;
1613
1614#ifndef NDEBUG
1615 // We should never have a cloned child loop header but fail to have
1616 // all of the blocks for that child loop.
1617 for (auto *ChildLoopBB : ChildL->blocks())
1618 assert(BlocksInClonedLoop.count(
1619 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1620 "Child cloned loop has a header within the cloned outer "
1621 "loop but not all of its blocks!");
1622#endif
1623
1624 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1625 }
1626 }
1627
1628 // Now that we've handled all the components of the original loop that were
1629 // cloned into a new loop, we still need to handle anything from the original
1630 // loop that wasn't in a cloned loop.
1631
1632 // Figure out what blocks are left to place within any loop nest containing
1633 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1634 // them.
1635 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1636 if (BlocksInClonedLoop.empty())
1637 UnloopedBlockSet.insert(ClonedPH);
1638 for (auto *ClonedBB : ClonedLoopBlocks)
1639 if (!BlocksInClonedLoop.count(ClonedBB))
1640 UnloopedBlockSet.insert(ClonedBB);
1641
1642 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1643 // backwards across these to process them inside out. The order shouldn't
1644 // matter as we're just trying to build up the map from inside-out; we use
1645 // the map in a more stably ordered way below.
1646 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1647 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1648 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1649 ExitLoopMap.lookup(RHS)->getLoopDepth();
1650 });
1651
1652 // Populate the existing ExitLoopMap with everything reachable from each
1653 // exit, starting from the inner most exit.
1654 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1655 assert(Worklist.empty() && "Didn't clear worklist!");
1656
1657 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1658 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1659
1660 // Walk the CFG back until we hit the cloned PH adding everything reachable
1661 // and in the unlooped set to this exit block's loop.
1662 Worklist.push_back(ExitBB);
1663 do {
1664 BasicBlock *BB = Worklist.pop_back_val();
1665 // We can stop recursing at the cloned preheader (if we get there).
1666 if (BB == ClonedPH)
1667 continue;
1668
1669 for (BasicBlock *PredBB : predecessors(BB)) {
1670 // If this pred has already been moved to our set or is part of some
1671 // (inner) loop, no update needed.
1672 if (!UnloopedBlockSet.erase(PredBB)) {
1673 assert(
1674 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1675 "Predecessor not mapped to a loop!");
1676 continue;
1677 }
1678
1679 // We just insert into the loop set here. We'll add these blocks to the
1680 // exit loop after we build up the set in an order that doesn't rely on
1681 // predecessor order (which in turn relies on use list order).
1682 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1683 (void)Inserted;
1684 assert(Inserted && "Should only visit an unlooped block once!");
1685
1686 // And recurse through to its predecessors.
1687 Worklist.push_back(PredBB);
1688 }
1689 } while (!Worklist.empty());
1690 }
1691
1692 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1693 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1694 // in their original order adding them to the correct loop.
1695
1696 // We need a stable insertion order. We use the order of the original loop
1697 // order and map into the correct parent loop.
1698 for (auto *BB : llvm::concat<BasicBlock *const>(
1699 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1700 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1701 OuterL->addBasicBlockToLoop(BB, LI);
1702
1703#ifndef NDEBUG
1704 for (auto &BBAndL : ExitLoopMap) {
1705 auto *BB = BBAndL.first;
1706 auto *OuterL = BBAndL.second;
1707 assert(LI.getLoopFor(BB) == OuterL &&
1708 "Failed to put all blocks into outer loops!");
1709 }
1710#endif
1711
1712 // Now that all the blocks are placed into the correct containing loop in the
1713 // absence of child loops, find all the potentially cloned child loops and
1714 // clone them into whatever outer loop we placed their header into.
1715 for (Loop *ChildL : OrigL) {
1716 auto *ClonedChildHeader =
1717 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1718 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1719 continue;
1720
1721#ifndef NDEBUG
1722 for (auto *ChildLoopBB : ChildL->blocks())
1723 assert(VMap.count(ChildLoopBB) &&
1724 "Cloned a child loop header but not all of that loops blocks!");
1725#endif
1726
1727 NonChildClonedLoops.push_back(cloneLoopNest(
1728 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1729 }
1730}
1731
1732static void
1734 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1735 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1736 // Find all the dead clones, and remove them from their successors.
1738 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1739 for (const auto &VMap : VMaps)
1740 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1741 if (!DT.isReachableFromEntry(ClonedBB)) {
1742 for (BasicBlock *SuccBB : successors(ClonedBB))
1743 SuccBB->removePredecessor(ClonedBB);
1744 DeadBlocks.push_back(ClonedBB);
1745 }
1746
1747 // Remove all MemorySSA in the dead blocks
1748 if (MSSAU) {
1749 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1750 DeadBlocks.end());
1751 MSSAU->removeBlocks(DeadBlockSet);
1752 }
1753
1754 // Drop any remaining references to break cycles.
1755 for (BasicBlock *BB : DeadBlocks)
1756 BB->dropAllReferences();
1757 // Erase them from the IR.
1758 for (BasicBlock *BB : DeadBlocks)
1759 BB->eraseFromParent();
1760}
1761
1764 DominatorTree &DT, LoopInfo &LI,
1765 MemorySSAUpdater *MSSAU,
1766 ScalarEvolution *SE,
1767 LPMUpdater &LoopUpdater) {
1768 // Find all the dead blocks tied to this loop, and remove them from their
1769 // successors.
1771
1772 // Start with loop/exit blocks and get a transitive closure of reachable dead
1773 // blocks.
1774 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1775 ExitBlocks.end());
1776 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1777 while (!DeathCandidates.empty()) {
1778 auto *BB = DeathCandidates.pop_back_val();
1779 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1780 for (BasicBlock *SuccBB : successors(BB)) {
1781 SuccBB->removePredecessor(BB);
1782 DeathCandidates.push_back(SuccBB);
1783 }
1784 DeadBlockSet.insert(BB);
1785 }
1786 }
1787
1788 // Remove all MemorySSA in the dead blocks
1789 if (MSSAU)
1790 MSSAU->removeBlocks(DeadBlockSet);
1791
1792 // Filter out the dead blocks from the exit blocks list so that it can be
1793 // used in the caller.
1794 llvm::erase_if(ExitBlocks,
1795 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1796
1797 // Walk from this loop up through its parents removing all of the dead blocks.
1798 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1799 for (auto *BB : DeadBlockSet)
1800 ParentL->getBlocksSet().erase(BB);
1801 llvm::erase_if(ParentL->getBlocksVector(),
1802 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1803 }
1804
1805 // Now delete the dead child loops. This raw delete will clear them
1806 // recursively.
1807 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1808 if (!DeadBlockSet.count(ChildL->getHeader()))
1809 return false;
1810
1811 assert(llvm::all_of(ChildL->blocks(),
1812 [&](BasicBlock *ChildBB) {
1813 return DeadBlockSet.count(ChildBB);
1814 }) &&
1815 "If the child loop header is dead all blocks in the child loop must "
1816 "be dead as well!");
1817 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1818 if (SE)
1820 LI.destroy(ChildL);
1821 return true;
1822 });
1823
1824 // Remove the loop mappings for the dead blocks and drop all the references
1825 // from these blocks to others to handle cyclic references as we start
1826 // deleting the blocks themselves.
1827 for (auto *BB : DeadBlockSet) {
1828 // Check that the dominator tree has already been updated.
1829 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1830 LI.changeLoopFor(BB, nullptr);
1831 // Drop all uses of the instructions to make sure we won't have dangling
1832 // uses in other blocks.
1833 for (auto &I : *BB)
1834 if (!I.use_empty())
1835 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1836 BB->dropAllReferences();
1837 }
1838
1839 // Actually delete the blocks now that they've been fully unhooked from the
1840 // IR.
1841 for (auto *BB : DeadBlockSet)
1842 BB->eraseFromParent();
1843}
1844
1845/// Recompute the set of blocks in a loop after unswitching.
1846///
1847/// This walks from the original headers predecessors to rebuild the loop. We
1848/// take advantage of the fact that new blocks can't have been added, and so we
1849/// filter by the original loop's blocks. This also handles potentially
1850/// unreachable code that we don't want to explore but might be found examining
1851/// the predecessors of the header.
1852///
1853/// If the original loop is no longer a loop, this will return an empty set. If
1854/// it remains a loop, all the blocks within it will be added to the set
1855/// (including those blocks in inner loops).
1857 LoopInfo &LI) {
1859
1860 auto *PH = L.getLoopPreheader();
1861 auto *Header = L.getHeader();
1862
1863 // A worklist to use while walking backwards from the header.
1865
1866 // First walk the predecessors of the header to find the backedges. This will
1867 // form the basis of our walk.
1868 for (auto *Pred : predecessors(Header)) {
1869 // Skip the preheader.
1870 if (Pred == PH)
1871 continue;
1872
1873 // Because the loop was in simplified form, the only non-loop predecessor
1874 // is the preheader.
1875 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1876 "than the preheader that is not part of the "
1877 "loop!");
1878
1879 // Insert this block into the loop set and on the first visit and, if it
1880 // isn't the header we're currently walking, put it into the worklist to
1881 // recurse through.
1882 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1883 Worklist.push_back(Pred);
1884 }
1885
1886 // If no backedges were found, we're done.
1887 if (LoopBlockSet.empty())
1888 return LoopBlockSet;
1889
1890 // We found backedges, recurse through them to identify the loop blocks.
1891 while (!Worklist.empty()) {
1892 BasicBlock *BB = Worklist.pop_back_val();
1893 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1894
1895 // No need to walk past the header.
1896 if (BB == Header)
1897 continue;
1898
1899 // Because we know the inner loop structure remains valid we can use the
1900 // loop structure to jump immediately across the entire nested loop.
1901 // Further, because it is in loop simplified form, we can directly jump
1902 // to its preheader afterward.
1903 if (Loop *InnerL = LI.getLoopFor(BB))
1904 if (InnerL != &L) {
1905 assert(L.contains(InnerL) &&
1906 "Should not reach a loop *outside* this loop!");
1907 // The preheader is the only possible predecessor of the loop so
1908 // insert it into the set and check whether it was already handled.
1909 auto *InnerPH = InnerL->getLoopPreheader();
1910 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1911 "but not contain the inner loop "
1912 "preheader!");
1913 if (!LoopBlockSet.insert(InnerPH).second)
1914 // The only way to reach the preheader is through the loop body
1915 // itself so if it has been visited the loop is already handled.
1916 continue;
1917
1918 // Insert all of the blocks (other than those already present) into
1919 // the loop set. We expect at least the block that led us to find the
1920 // inner loop to be in the block set, but we may also have other loop
1921 // blocks if they were already enqueued as predecessors of some other
1922 // outer loop block.
1923 for (auto *InnerBB : InnerL->blocks()) {
1924 if (InnerBB == BB) {
1925 assert(LoopBlockSet.count(InnerBB) &&
1926 "Block should already be in the set!");
1927 continue;
1928 }
1929
1930 LoopBlockSet.insert(InnerBB);
1931 }
1932
1933 // Add the preheader to the worklist so we will continue past the
1934 // loop body.
1935 Worklist.push_back(InnerPH);
1936 continue;
1937 }
1938
1939 // Insert any predecessors that were in the original loop into the new
1940 // set, and if the insert is successful, add them to the worklist.
1941 for (auto *Pred : predecessors(BB))
1942 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1943 Worklist.push_back(Pred);
1944 }
1945
1946 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1947
1948 // We've found all the blocks participating in the loop, return our completed
1949 // set.
1950 return LoopBlockSet;
1951}
1952
1953/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1954///
1955/// The removal may have removed some child loops entirely but cannot have
1956/// disturbed any remaining child loops. However, they may need to be hoisted
1957/// to the parent loop (or to be top-level loops). The original loop may be
1958/// completely removed.
1959///
1960/// The sibling loops resulting from this update are returned. If the original
1961/// loop remains a valid loop, it will be the first entry in this list with all
1962/// of the newly sibling loops following it.
1963///
1964/// Returns true if the loop remains a loop after unswitching, and false if it
1965/// is no longer a loop after unswitching (and should not continue to be
1966/// referenced).
1968 LoopInfo &LI,
1969 SmallVectorImpl<Loop *> &HoistedLoops,
1970 ScalarEvolution *SE) {
1971 auto *PH = L.getLoopPreheader();
1972
1973 // Compute the actual parent loop from the exit blocks. Because we may have
1974 // pruned some exits the loop may be different from the original parent.
1975 Loop *ParentL = nullptr;
1976 SmallVector<Loop *, 4> ExitLoops;
1977 SmallVector<BasicBlock *, 4> ExitsInLoops;
1978 ExitsInLoops.reserve(ExitBlocks.size());
1979 for (auto *ExitBB : ExitBlocks)
1980 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1981 ExitLoops.push_back(ExitL);
1982 ExitsInLoops.push_back(ExitBB);
1983 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1984 ParentL = ExitL;
1985 }
1986
1987 // Recompute the blocks participating in this loop. This may be empty if it
1988 // is no longer a loop.
1989 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1990
1991 // If we still have a loop, we need to re-set the loop's parent as the exit
1992 // block set changing may have moved it within the loop nest. Note that this
1993 // can only happen when this loop has a parent as it can only hoist the loop
1994 // *up* the nest.
1995 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1996 // Remove this loop's (original) blocks from all of the intervening loops.
1997 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1998 IL = IL->getParentLoop()) {
1999 IL->getBlocksSet().erase(PH);
2000 for (auto *BB : L.blocks())
2001 IL->getBlocksSet().erase(BB);
2002 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
2003 return BB == PH || L.contains(BB);
2004 });
2005 }
2006
2007 LI.changeLoopFor(PH, ParentL);
2008 L.getParentLoop()->removeChildLoop(&L);
2009 if (ParentL)
2010 ParentL->addChildLoop(&L);
2011 else
2012 LI.addTopLevelLoop(&L);
2013 }
2014
2015 // Now we update all the blocks which are no longer within the loop.
2016 auto &Blocks = L.getBlocksVector();
2017 auto BlocksSplitI =
2018 LoopBlockSet.empty()
2019 ? Blocks.begin()
2020 : std::stable_partition(
2021 Blocks.begin(), Blocks.end(),
2022 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
2023
2024 // Before we erase the list of unlooped blocks, build a set of them.
2025 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
2026 if (LoopBlockSet.empty())
2027 UnloopedBlocks.insert(PH);
2028
2029 // Now erase these blocks from the loop.
2030 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
2031 L.getBlocksSet().erase(BB);
2032 Blocks.erase(BlocksSplitI, Blocks.end());
2033
2034 // Sort the exits in ascending loop depth, we'll work backwards across these
2035 // to process them inside out.
2036 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
2037 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
2038 });
2039
2040 // We'll build up a set for each exit loop.
2041 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
2042 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
2043
2044 auto RemoveUnloopedBlocksFromLoop =
2045 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
2046 for (auto *BB : UnloopedBlocks)
2047 L.getBlocksSet().erase(BB);
2048 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
2049 return UnloopedBlocks.count(BB);
2050 });
2051 };
2052
2054 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
2055 assert(Worklist.empty() && "Didn't clear worklist!");
2056 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
2057
2058 // Grab the next exit block, in decreasing loop depth order.
2059 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
2060 Loop &ExitL = *LI.getLoopFor(ExitBB);
2061 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
2062
2063 // Erase all of the unlooped blocks from the loops between the previous
2064 // exit loop and this exit loop. This works because the ExitInLoops list is
2065 // sorted in increasing order of loop depth and thus we visit loops in
2066 // decreasing order of loop depth.
2067 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2068 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2069
2070 // Walk the CFG back until we hit the cloned PH adding everything reachable
2071 // and in the unlooped set to this exit block's loop.
2072 Worklist.push_back(ExitBB);
2073 do {
2074 BasicBlock *BB = Worklist.pop_back_val();
2075 // We can stop recursing at the cloned preheader (if we get there).
2076 if (BB == PH)
2077 continue;
2078
2079 for (BasicBlock *PredBB : predecessors(BB)) {
2080 // If this pred has already been moved to our set or is part of some
2081 // (inner) loop, no update needed.
2082 if (!UnloopedBlocks.erase(PredBB)) {
2083 assert((NewExitLoopBlocks.count(PredBB) ||
2084 ExitL.contains(LI.getLoopFor(PredBB))) &&
2085 "Predecessor not in a nested loop (or already visited)!");
2086 continue;
2087 }
2088
2089 // We just insert into the loop set here. We'll add these blocks to the
2090 // exit loop after we build up the set in a deterministic order rather
2091 // than the predecessor-influenced visit order.
2092 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2093 (void)Inserted;
2094 assert(Inserted && "Should only visit an unlooped block once!");
2095
2096 // And recurse through to its predecessors.
2097 Worklist.push_back(PredBB);
2098 }
2099 } while (!Worklist.empty());
2100
2101 // If blocks in this exit loop were directly part of the original loop (as
2102 // opposed to a child loop) update the map to point to this exit loop. This
2103 // just updates a map and so the fact that the order is unstable is fine.
2104 for (auto *BB : NewExitLoopBlocks)
2105 if (Loop *BBL = LI.getLoopFor(BB))
2106 if (BBL == &L || !L.contains(BBL))
2107 LI.changeLoopFor(BB, &ExitL);
2108
2109 // We will remove the remaining unlooped blocks from this loop in the next
2110 // iteration or below.
2111 NewExitLoopBlocks.clear();
2112 }
2113
2114 // Any remaining unlooped blocks are no longer part of any loop unless they
2115 // are part of some child loop.
2116 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2117 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2118 for (auto *BB : UnloopedBlocks)
2119 if (Loop *BBL = LI.getLoopFor(BB))
2120 if (BBL == &L || !L.contains(BBL))
2121 LI.changeLoopFor(BB, nullptr);
2122
2123 // Sink all the child loops whose headers are no longer in the loop set to
2124 // the parent (or to be top level loops). We reach into the loop and directly
2125 // update its subloop vector to make this batch update efficient.
2126 auto &SubLoops = L.getSubLoopsVector();
2127 auto SubLoopsSplitI =
2128 LoopBlockSet.empty()
2129 ? SubLoops.begin()
2130 : std::stable_partition(
2131 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2132 return LoopBlockSet.count(SubL->getHeader());
2133 });
2134 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2135 HoistedLoops.push_back(HoistedL);
2136 HoistedL->setParentLoop(nullptr);
2137
2138 // To compute the new parent of this hoisted loop we look at where we
2139 // placed the preheader above. We can't lookup the header itself because we
2140 // retained the mapping from the header to the hoisted loop. But the
2141 // preheader and header should have the exact same new parent computed
2142 // based on the set of exit blocks from the original loop as the preheader
2143 // is a predecessor of the header and so reached in the reverse walk. And
2144 // because the loops were all in simplified form the preheader of the
2145 // hoisted loop can't be part of some *other* loop.
2146 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2147 NewParentL->addChildLoop(HoistedL);
2148 else
2149 LI.addTopLevelLoop(HoistedL);
2150 }
2151 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2152
2153 // Actually delete the loop if nothing remained within it.
2154 if (Blocks.empty()) {
2155 assert(SubLoops.empty() &&
2156 "Failed to remove all subloops from the original loop!");
2157 if (Loop *ParentL = L.getParentLoop())
2158 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2159 else
2160 LI.removeLoop(llvm::find(LI, &L));
2161 // markLoopAsDeleted for L should be triggered by the caller (it is
2162 // typically done within postUnswitch).
2163 if (SE)
2165 LI.destroy(&L);
2166 return false;
2167 }
2168
2169 return true;
2170}
2171
2172/// Helper to visit a dominator subtree, invoking a callable on each node.
2173///
2174/// Returning false at any point will stop walking past that node of the tree.
2175template <typename CallableT>
2176void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2178 DomWorklist.push_back(DT[BB]);
2179#ifndef NDEBUG
2181 Visited.insert(DT[BB]);
2182#endif
2183 do {
2184 DomTreeNode *N = DomWorklist.pop_back_val();
2185
2186 // Visit this node.
2187 if (!Callable(N->getBlock()))
2188 continue;
2189
2190 // Accumulate the child nodes.
2191 for (DomTreeNode *ChildN : *N) {
2192 assert(Visited.insert(ChildN).second &&
2193 "Cannot visit a node twice when walking a tree!");
2194 DomWorklist.push_back(ChildN);
2195 }
2196 } while (!DomWorklist.empty());
2197}
2198
2200 bool CurrentLoopValid, bool PartiallyInvariant,
2201 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2202 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2203 if (!NewLoops.empty())
2204 U.addSiblingLoops(NewLoops);
2205
2206 // If the current loop remains valid, we should revisit it to catch any
2207 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2208 if (CurrentLoopValid) {
2209 if (PartiallyInvariant) {
2210 // Mark the new loop as partially unswitched, to avoid unswitching on
2211 // the same condition again.
2212 auto &Context = L.getHeader()->getContext();
2213 MDNode *DisableUnswitchMD = MDNode::get(
2214 Context,
2215 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2217 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2218 {DisableUnswitchMD});
2219 L.setLoopID(NewLoopID);
2220 } else if (InjectedCondition) {
2221 // Do the same for injection of invariant conditions.
2222 auto &Context = L.getHeader()->getContext();
2223 MDNode *DisableUnswitchMD = MDNode::get(
2224 Context,
2225 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2227 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2228 {DisableUnswitchMD});
2229 L.setLoopID(NewLoopID);
2230 } else
2231 U.revisitCurrentLoop();
2232 } else
2233 U.markLoopAsDeleted(L, LoopName);
2234}
2235
2237 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2238 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2240 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2241 auto *ParentBB = TI.getParent();
2243 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2244
2245 // Save the current loop name in a variable so that we can report it even
2246 // after it has been deleted.
2247 std::string LoopName(L.getName());
2248
2249 // We can only unswitch switches, conditional branches with an invariant
2250 // condition, or combining invariant conditions with an instruction or
2251 // partially invariant instructions.
2252 assert((SI || (BI && BI->isConditional())) &&
2253 "Can only unswitch switches and conditional branch!");
2254 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2255 bool FullUnswitch =
2256 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2257 !PartiallyInvariant);
2258 if (FullUnswitch)
2259 assert(Invariants.size() == 1 &&
2260 "Cannot have other invariants with full unswitching!");
2261 else
2263 "Partial unswitching requires an instruction as the condition!");
2264
2265 if (MSSAU && VerifyMemorySSA)
2266 MSSAU->getMemorySSA()->verifyMemorySSA();
2267
2268 // Constant and BBs tracking the cloned and continuing successor. When we are
2269 // unswitching the entire condition, this can just be trivially chosen to
2270 // unswitch towards `true`. However, when we are unswitching a set of
2271 // invariants combined with `and` or `or` or partially invariant instructions,
2272 // the combining operation determines the best direction to unswitch: we want
2273 // to unswitch the direction that will collapse the branch.
2274 bool Direction = true;
2275 int ClonedSucc = 0;
2276 if (!FullUnswitch) {
2278 (void)Cond;
2280 PartiallyInvariant) &&
2281 "Only `or`, `and`, an `select`, partially invariant instructions "
2282 "can combine invariants being unswitched.");
2283 if (!match(Cond, m_LogicalOr())) {
2284 if (match(Cond, m_LogicalAnd()) ||
2285 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2286 Direction = false;
2287 ClonedSucc = 1;
2288 }
2289 }
2290 }
2291
2292 BasicBlock *RetainedSuccBB =
2293 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2294 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2295 if (BI)
2296 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2297 else
2298 for (auto Case : SI->cases())
2299 if (Case.getCaseSuccessor() != RetainedSuccBB)
2300 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2301
2302 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2303 "Should not unswitch the same successor we are retaining!");
2304
2305 // The branch should be in this exact loop. Any inner loop's invariant branch
2306 // should be handled by unswitching that inner loop. The caller of this
2307 // routine should filter out any candidates that remain (but were skipped for
2308 // whatever reason).
2309 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2310
2311 // Compute the parent loop now before we start hacking on things.
2312 Loop *ParentL = L.getParentLoop();
2313 // Get blocks in RPO order for MSSA update, before changing the CFG.
2314 LoopBlocksRPO LBRPO(&L);
2315 if (MSSAU)
2316 LBRPO.perform(&LI);
2317
2318 // Compute the outer-most loop containing one of our exit blocks. This is the
2319 // furthest up our loopnest which can be mutated, which we will use below to
2320 // update things.
2321 Loop *OuterExitL = &L;
2323 L.getUniqueExitBlocks(ExitBlocks);
2324 for (auto *ExitBB : ExitBlocks) {
2325 // ExitBB can be an exit block for several levels in the loop nest. Make
2326 // sure we find the top most.
2327 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2328 if (!NewOuterExitL) {
2329 // We exited the entire nest with this block, so we're done.
2330 OuterExitL = nullptr;
2331 break;
2332 }
2333 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2334 OuterExitL = NewOuterExitL;
2335 }
2336
2337 // At this point, we're definitely going to unswitch something so invalidate
2338 // any cached information in ScalarEvolution for the outer most loop
2339 // containing an exit block and all nested loops.
2340 if (SE) {
2341 if (OuterExitL)
2342 SE->forgetLoop(OuterExitL);
2343 else
2344 SE->forgetTopmostLoop(&L);
2346 }
2347
2348 // If the edge from this terminator to a successor dominates that successor,
2349 // store a map from each block in its dominator subtree to it. This lets us
2350 // tell when cloning for a particular successor if a block is dominated by
2351 // some *other* successor with a single data structure. We use this to
2352 // significantly reduce cloning.
2354 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2355 UnswitchedSuccBBs))
2356 if (SuccBB->getUniquePredecessor() ||
2357 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2358 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2359 }))
2360 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2361 DominatingSucc[BB] = SuccBB;
2362 return true;
2363 });
2364
2365 // Split the preheader, so that we know that there is a safe place to insert
2366 // the conditional branch. We will change the preheader to have a conditional
2367 // branch on LoopCond. The original preheader will become the split point
2368 // between the unswitched versions, and we will have a new preheader for the
2369 // original loop.
2370 BasicBlock *SplitBB = L.getLoopPreheader();
2371 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2372
2373 // Keep track of the dominator tree updates needed.
2375
2376 // Clone the loop for each unswitched successor.
2378 VMaps.reserve(UnswitchedSuccBBs.size());
2380 for (auto *SuccBB : UnswitchedSuccBBs) {
2381 VMaps.emplace_back(new ValueToValueMapTy());
2382 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2383 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2384 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2385 }
2386
2387 // Drop metadata if we may break its semantics by moving this instr into the
2388 // split block.
2389 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2391 // Do not spend time trying to understand if we can keep it, just drop it
2392 // to save compile time.
2393 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2394 else {
2395 // It is only legal to preserve make.implicit metadata if we are
2396 // guaranteed no reach implicit null check after following this branch.
2397 ICFLoopSafetyInfo SafetyInfo;
2398 SafetyInfo.computeLoopSafetyInfo(&L);
2399 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2400 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2401 }
2402 }
2403
2404 // The stitching of the branched code back together depends on whether we're
2405 // doing full unswitching or not with the exception that we always want to
2406 // nuke the initial terminator placed in the split block.
2407 SplitBB->getTerminator()->eraseFromParent();
2408 if (FullUnswitch) {
2409 // Keep a clone of the terminator for MSSA updates.
2410 Instruction *NewTI = TI.clone();
2411 NewTI->insertInto(ParentBB, ParentBB->end());
2412
2413 // Splice the terminator from the original loop and rewrite its
2414 // successors.
2415 TI.moveBefore(*SplitBB, SplitBB->end());
2416 TI.dropLocation();
2417
2418 // First wire up the moved terminator to the preheaders.
2419 if (BI) {
2420 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2421 BI->setSuccessor(ClonedSucc, ClonedPH);
2422 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2424 if (InsertFreeze) {
2425 // We don't give any debug location to the new freeze, because the
2426 // BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted
2427 // out of the loop.
2428 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2430 }
2431 BI->setCondition(Cond);
2432 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2433 } else {
2434 assert(SI && "Must either be a branch or switch!");
2435
2436 // Walk the cases and directly update their successors.
2437 assert(SI->getDefaultDest() == RetainedSuccBB &&
2438 "Not retaining default successor!");
2439 SI->setDefaultDest(LoopPH);
2440 for (const auto &Case : SI->cases())
2441 if (Case.getCaseSuccessor() == RetainedSuccBB)
2442 Case.setSuccessor(LoopPH);
2443 else
2444 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2445
2446 if (InsertFreeze)
2447 SI->setCondition(new FreezeInst(SI->getCondition(),
2448 SI->getCondition()->getName() + ".fr",
2449 SI->getIterator()));
2450
2451 // We need to use the set to populate domtree updates as even when there
2452 // are multiple cases pointing at the same successor we only want to
2453 // remove and insert one edge in the domtree.
2454 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2455 DTUpdates.push_back(
2456 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2457 }
2458
2459 if (MSSAU) {
2460 DT.applyUpdates(DTUpdates);
2461 DTUpdates.clear();
2462
2463 // Remove all but one edge to the retained block and all unswitched
2464 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2465 // when we know we only keep a single edge for each case.
2466 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2467 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2468 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2469
2470 for (auto &VMap : VMaps)
2471 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2472 /*IgnoreIncomingWithNoClones=*/true);
2473 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2474
2475 // Remove all edges to unswitched blocks.
2476 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2477 MSSAU->removeEdge(ParentBB, SuccBB);
2478 }
2479
2480 // Now unhook the successor relationship as we'll be replacing
2481 // the terminator with a direct branch. This is much simpler for branches
2482 // than switches so we handle those first.
2483 if (BI) {
2484 // Remove the parent as a predecessor of the unswitched successor.
2485 assert(UnswitchedSuccBBs.size() == 1 &&
2486 "Only one possible unswitched block for a branch!");
2487 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2488 UnswitchedSuccBB->removePredecessor(ParentBB,
2489 /*KeepOneInputPHIs*/ true);
2490 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2491 } else {
2492 // Note that we actually want to remove the parent block as a predecessor
2493 // of *every* case successor. The case successor is either unswitched,
2494 // completely eliminating an edge from the parent to that successor, or it
2495 // is a duplicate edge to the retained successor as the retained successor
2496 // is always the default successor and as we'll replace this with a direct
2497 // branch we no longer need the duplicate entries in the PHI nodes.
2498 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2499 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2500 "Not retaining default successor!");
2501 for (const auto &Case : NewSI->cases())
2502 Case.getCaseSuccessor()->removePredecessor(
2503 ParentBB,
2504 /*KeepOneInputPHIs*/ true);
2505
2506 // We need to use the set to populate domtree updates as even when there
2507 // are multiple cases pointing at the same successor we only want to
2508 // remove and insert one edge in the domtree.
2509 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2510 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2511 }
2512
2513 // Create a new unconditional branch to the continuing block (as opposed to
2514 // the one cloned).
2515 Instruction *NewBI = BranchInst::Create(RetainedSuccBB, ParentBB);
2516 NewBI->setDebugLoc(NewTI->getDebugLoc());
2517
2518 // After MSSAU update, remove the cloned terminator instruction NewTI.
2519 NewTI->eraseFromParent();
2520 } else {
2521 assert(BI && "Only branches have partial unswitching.");
2522 assert(UnswitchedSuccBBs.size() == 1 &&
2523 "Only one possible unswitched block for a branch!");
2524 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2525 // When doing a partial unswitch, we have to do a bit more work to build up
2526 // the branch in the split block.
2527 if (PartiallyInvariant)
2529 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
2530 else {
2532 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2533 FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
2534 }
2535 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2536
2537 if (MSSAU) {
2538 DT.applyUpdates(DTUpdates);
2539 DTUpdates.clear();
2540
2541 // Perform MSSA cloning updates.
2542 for (auto &VMap : VMaps)
2543 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2544 /*IgnoreIncomingWithNoClones=*/true);
2545 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2546 }
2547 }
2548
2549 // Apply the updates accumulated above to get an up-to-date dominator tree.
2550 DT.applyUpdates(DTUpdates);
2551
2552 // Now that we have an accurate dominator tree, first delete the dead cloned
2553 // blocks so that we can accurately build any cloned loops. It is important to
2554 // not delete the blocks from the original loop yet because we still want to
2555 // reference the original loop to understand the cloned loop's structure.
2556 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2557
2558 // Build the cloned loop structure itself. This may be substantially
2559 // different from the original structure due to the simplified CFG. This also
2560 // handles inserting all the cloned blocks into the correct loops.
2561 SmallVector<Loop *, 4> NonChildClonedLoops;
2562 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2563 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2564
2565 // Now that our cloned loops have been built, we can update the original loop.
2566 // First we delete the dead blocks from it and then we rebuild the loop
2567 // structure taking these deletions into account.
2568 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2569
2570 if (MSSAU && VerifyMemorySSA)
2571 MSSAU->getMemorySSA()->verifyMemorySSA();
2572
2573 SmallVector<Loop *, 4> HoistedLoops;
2574 bool IsStillLoop =
2575 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2576
2577 if (MSSAU && VerifyMemorySSA)
2578 MSSAU->getMemorySSA()->verifyMemorySSA();
2579
2580 // This transformation has a high risk of corrupting the dominator tree, and
2581 // the below steps to rebuild loop structures will result in hard to debug
2582 // errors in that case so verify that the dominator tree is sane first.
2583 // FIXME: Remove this when the bugs stop showing up and rely on existing
2584 // verification steps.
2585 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2586
2587 if (BI && !PartiallyInvariant) {
2588 // If we unswitched a branch which collapses the condition to a known
2589 // constant we want to replace all the uses of the invariants within both
2590 // the original and cloned blocks. We do this here so that we can use the
2591 // now updated dominator tree to identify which side the users are on.
2592 assert(UnswitchedSuccBBs.size() == 1 &&
2593 "Only one possible unswitched block for a branch!");
2594 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2595
2596 // When considering multiple partially-unswitched invariants
2597 // we cant just go replace them with constants in both branches.
2598 //
2599 // For 'AND' we infer that true branch ("continue") means true
2600 // for each invariant operand.
2601 // For 'OR' we can infer that false branch ("continue") means false
2602 // for each invariant operand.
2603 // So it happens that for multiple-partial case we dont replace
2604 // in the unswitched branch.
2605 bool ReplaceUnswitched =
2606 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2607
2608 ConstantInt *UnswitchedReplacement =
2611 ConstantInt *ContinueReplacement =
2614 for (Value *Invariant : Invariants) {
2615 assert(!isa<Constant>(Invariant) &&
2616 "Should not be replacing constant values!");
2617 // Use make_early_inc_range here as set invalidates the iterator.
2618 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2619 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2620 if (!UserI)
2621 continue;
2622
2623 // Replace it with the 'continue' side if in the main loop body, and the
2624 // unswitched if in the cloned blocks.
2625 if (DT.dominates(LoopPH, UserI->getParent()))
2626 U.set(ContinueReplacement);
2627 else if (ReplaceUnswitched &&
2628 DT.dominates(ClonedPH, UserI->getParent()))
2629 U.set(UnswitchedReplacement);
2630 }
2631 }
2632 }
2633
2634 // We can change which blocks are exit blocks of all the cloned sibling
2635 // loops, the current loop, and any parent loops which shared exit blocks
2636 // with the current loop. As a consequence, we need to re-form LCSSA for
2637 // them. But we shouldn't need to re-form LCSSA for any child loops.
2638 // FIXME: This could be made more efficient by tracking which exit blocks are
2639 // new, and focusing on them, but that isn't likely to be necessary.
2640 //
2641 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2642 // loop nest and update every loop that could have had its exits changed. We
2643 // also need to cover any intervening loops. We add all of these loops to
2644 // a list and sort them by loop depth to achieve this without updating
2645 // unnecessary loops.
2646 auto UpdateLoop = [&](Loop &UpdateL) {
2647#ifndef NDEBUG
2648 UpdateL.verifyLoop();
2649 for (Loop *ChildL : UpdateL) {
2650 ChildL->verifyLoop();
2651 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2652 "Perturbed a child loop's LCSSA form!");
2653 }
2654#endif
2655 // First build LCSSA for this loop so that we can preserve it when
2656 // forming dedicated exits. We don't want to perturb some other loop's
2657 // LCSSA while doing that CFG edit.
2658 formLCSSA(UpdateL, DT, &LI, SE);
2659
2660 // For loops reached by this loop's original exit blocks we may
2661 // introduced new, non-dedicated exits. At least try to re-form dedicated
2662 // exits for these loops. This may fail if they couldn't have dedicated
2663 // exits to start with.
2664 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2665 };
2666
2667 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2668 // and we can do it in any order as they don't nest relative to each other.
2669 //
2670 // Also check if any of the loops we have updated have become top-level loops
2671 // as that will necessitate widening the outer loop scope.
2672 for (Loop *UpdatedL :
2673 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2674 UpdateLoop(*UpdatedL);
2675 if (UpdatedL->isOutermost())
2676 OuterExitL = nullptr;
2677 }
2678 if (IsStillLoop) {
2679 UpdateLoop(L);
2680 if (L.isOutermost())
2681 OuterExitL = nullptr;
2682 }
2683
2684 // If the original loop had exit blocks, walk up through the outer most loop
2685 // of those exit blocks to update LCSSA and form updated dedicated exits.
2686 if (OuterExitL != &L)
2687 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2688 OuterL = OuterL->getParentLoop())
2689 UpdateLoop(*OuterL);
2690
2691#ifndef NDEBUG
2692 // Verify the entire loop structure to catch any incorrect updates before we
2693 // progress in the pass pipeline.
2694 LI.verify(DT);
2695#endif
2696
2697 // Now that we've unswitched something, make callbacks to report the changes.
2698 // For that we need to merge together the updated loops and the cloned loops
2699 // and check whether the original loop survived.
2700 SmallVector<Loop *, 4> SibLoops;
2701 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2702 if (UpdatedL->getParentLoop() == ParentL)
2703 SibLoops.push_back(UpdatedL);
2704 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2705 InjectedCondition, SibLoops);
2706
2707 if (MSSAU && VerifyMemorySSA)
2708 MSSAU->getMemorySSA()->verifyMemorySSA();
2709
2710 if (BI)
2711 ++NumBranches;
2712 else
2713 ++NumSwitches;
2714}
2715
2716/// Recursively compute the cost of a dominator subtree based on the per-block
2717/// cost map provided.
2718///
2719/// The recursive computation is memozied into the provided DT-indexed cost map
2720/// to allow querying it for most nodes in the domtree without it becoming
2721/// quadratic.
2723 DomTreeNode &N,
2726 // Don't accumulate cost (or recurse through) blocks not in our block cost
2727 // map and thus not part of the duplication cost being considered.
2728 auto BBCostIt = BBCostMap.find(N.getBlock());
2729 if (BBCostIt == BBCostMap.end())
2730 return 0;
2731
2732 // Lookup this node to see if we already computed its cost.
2733 auto DTCostIt = DTCostMap.find(&N);
2734 if (DTCostIt != DTCostMap.end())
2735 return DTCostIt->second;
2736
2737 // If not, we have to compute it. We can't use insert above and update
2738 // because computing the cost may insert more things into the map.
2739 InstructionCost Cost = std::accumulate(
2740 N.begin(), N.end(), BBCostIt->second,
2741 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2742 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2743 });
2744 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2745 (void)Inserted;
2746 assert(Inserted && "Should not insert a node while visiting children!");
2747 return Cost;
2748}
2749
2750/// Turns a select instruction into implicit control flow branch,
2751/// making the following replacement:
2752///
2753/// head:
2754/// --code before select--
2755/// select %cond, %trueval, %falseval
2756/// --code after select--
2757///
2758/// into
2759///
2760/// head:
2761/// --code before select--
2762/// br i1 %cond, label %then, label %tail
2763///
2764/// then:
2765/// br %tail
2766///
2767/// tail:
2768/// phi [ %trueval, %then ], [ %falseval, %head]
2769/// unreachable
2770///
2771/// It also makes all relevant DT and LI updates, so that all structures are in
2772/// valid state after this transform.
2774 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2775 AssumptionCache *AC) {
2776 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2777 BasicBlock *HeadBB = SI->getParent();
2778
2779 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2780 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2781 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2782 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2783 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2784 *TailBB = CondBr->getSuccessor(1);
2785 if (MSSAU)
2786 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2787
2788 PHINode *Phi =
2789 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
2790 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2791 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2792 Phi->setDebugLoc(SI->getDebugLoc());
2793 SI->replaceAllUsesWith(Phi);
2794 SI->eraseFromParent();
2795
2796 if (MSSAU && VerifyMemorySSA)
2797 MSSAU->getMemorySSA()->verifyMemorySSA();
2798
2799 ++NumSelects;
2800 return CondBr;
2801}
2802
2803/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2804/// making the following replacement:
2805///
2806/// --code before guard--
2807/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2808/// --code after guard--
2809///
2810/// into
2811///
2812/// --code before guard--
2813/// br i1 %cond, label %guarded, label %deopt
2814///
2815/// guarded:
2816/// --code after guard--
2817///
2818/// deopt:
2819/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2820/// unreachable
2821///
2822/// It also makes all relevant DT and LI updates, so that all structures are in
2823/// valid state after this transform.
2825 DominatorTree &DT, LoopInfo &LI,
2826 MemorySSAUpdater *MSSAU) {
2827 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2828 BasicBlock *CheckBB = GI->getParent();
2829
2830 if (MSSAU && VerifyMemorySSA)
2831 MSSAU->getMemorySSA()->verifyMemorySSA();
2832
2833 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2834 Instruction *DeoptBlockTerm =
2836 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2837 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2838 // SplitBlockAndInsertIfThen inserts control flow that branches to
2839 // DeoptBlockTerm if the condition is true. We want the opposite.
2840 CheckBI->swapSuccessors();
2841
2842 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2843 GuardedBlock->setName("guarded");
2844 CheckBI->getSuccessor(1)->setName("deopt");
2845 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2846
2847 if (MSSAU)
2848 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2849
2850 GI->moveBefore(DeoptBlockTerm->getIterator());
2852
2853 if (MSSAU) {
2855 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2856 if (VerifyMemorySSA)
2857 MSSAU->getMemorySSA()->verifyMemorySSA();
2858 }
2859
2860 if (VerifyLoopInfo)
2861 LI.verify(DT);
2862 ++NumGuards;
2863 return CheckBI;
2864}
2865
2866/// Cost multiplier is a way to limit potentially exponential behavior
2867/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
2868/// candidates available. Also consider the number of "sibling" loops with
2869/// the idea of accounting for previous unswitches that already happened on this
2870/// cluster of loops. There was an attempt to keep this formula simple,
2871/// just enough to limit the worst case behavior. Even if it is not that simple
2872/// now it is still not an attempt to provide a detailed heuristic size
2873/// prediction.
2874///
2875/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2876/// unswitch candidates, making adequate predictions instead of wild guesses.
2877/// That requires knowing not just the number of "remaining" candidates but
2878/// also costs of unswitching for each of these candidates.
2880 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2881 const DominatorTree &DT,
2882 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2883
2884 // Guards and other exiting conditions do not contribute to exponential
2885 // explosion as soon as they dominate the latch (otherwise there might be
2886 // another path to the latch remaining that does not allow to eliminate the
2887 // loop copy on unswitch).
2888 const BasicBlock *Latch = L.getLoopLatch();
2889 const BasicBlock *CondBlock = TI.getParent();
2890 if (DT.dominates(CondBlock, Latch) &&
2891 (isGuard(&TI) ||
2892 (TI.isTerminator() &&
2893 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2894 return L.contains(SuccBB);
2895 }) <= 1))) {
2896 NumCostMultiplierSkipped++;
2897 return 1;
2898 }
2899
2900 // Each invariant non-trivial condition, after being unswitched, is supposed
2901 // to have its own specialized sibling loop (the invariant condition has been
2902 // hoisted out of the child loop into a newly-cloned loop). When unswitching
2903 // conditions in nested loops, the basic block size of the outer loop should
2904 // not be altered. If such a size significantly increases across unswitching
2905 // invocations, something may be wrong; so adjust the final cost taking this
2906 // into account.
2907 auto *ParentL = L.getParentLoop();
2908 int ParentLoopSizeMultiplier = 1;
2909 if (ParentL)
2910 ParentLoopSizeMultiplier =
2911 std::max<int>(ParentL->getNumBlocks() / UnswitchParentBlocksDiv, 1);
2912
2913 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2914 : std::distance(LI.begin(), LI.end()));
2915 // Count amount of clones that all the candidates might cause during
2916 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2917 // cases.
2918 int UnswitchedClones = 0;
2919 for (const auto &Candidate : UnswitchCandidates) {
2920 const Instruction *CI = Candidate.TI;
2921 const BasicBlock *CondBlock = CI->getParent();
2922 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2923 if (isa<SelectInst>(CI)) {
2924 UnswitchedClones++;
2925 continue;
2926 }
2927 if (isGuard(CI)) {
2928 if (!SkipExitingSuccessors)
2929 UnswitchedClones++;
2930 continue;
2931 }
2932 int NonExitingSuccessors =
2933 llvm::count_if(successors(CondBlock),
2934 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2935 return !SkipExitingSuccessors || L.contains(SuccBB);
2936 });
2937 UnswitchedClones += Log2_32(NonExitingSuccessors);
2938 }
2939
2940 // Ignore up to the "unscaled candidates" number of unswitch candidates
2941 // when calculating the power-of-two scaling of the cost. The main idea
2942 // with this control is to allow a small number of unswitches to happen
2943 // and rely more on siblings multiplier (see below) when the number
2944 // of candidates is small.
2945 unsigned ClonesPower =
2946 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2947
2948 // Allowing top-level loops to spread a bit more than nested ones.
2949 int SiblingsMultiplier =
2950 std::max((ParentL ? SiblingsCount
2951 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2952 1);
2953 // Compute the cost multiplier in a way that won't overflow by saturating
2954 // at an upper bound.
2955 int CostMultiplier;
2956 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2957 SiblingsMultiplier > UnswitchThreshold ||
2958 ParentLoopSizeMultiplier > UnswitchThreshold)
2959 CostMultiplier = UnswitchThreshold;
2960 else
2961 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2962 (int)UnswitchThreshold);
2963
2964 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2965 << " (siblings " << SiblingsMultiplier << " * parent size "
2966 << ParentLoopSizeMultiplier << " * clones "
2967 << (1 << ClonesPower) << ")"
2968 << " for unswitch candidate: " << TI << "\n");
2969 return CostMultiplier;
2970}
2971
2974 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2975 const Loop &L, const LoopInfo &LI, AAResults &AA,
2976 const MemorySSAUpdater *MSSAU) {
2977 assert(UnswitchCandidates.empty() && "Should be!");
2978
2979 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2981 if (isa<Constant>(Cond))
2982 return;
2983 if (L.isLoopInvariant(Cond)) {
2984 UnswitchCandidates.push_back({I, {Cond}});
2985 return;
2986 }
2988 TinyPtrVector<Value *> Invariants =
2990 L, *static_cast<Instruction *>(Cond), LI);
2991 if (!Invariants.empty())
2992 UnswitchCandidates.push_back({I, std::move(Invariants)});
2993 }
2994 };
2995
2996 // Whether or not we should also collect guards in the loop.
2997 bool CollectGuards = false;
2998 if (UnswitchGuards) {
2999 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
3000 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
3001 if (GuardDecl && !GuardDecl->use_empty())
3002 CollectGuards = true;
3003 }
3004
3005 for (auto *BB : L.blocks()) {
3006 if (LI.getLoopFor(BB) != &L)
3007 continue;
3008
3009 for (auto &I : *BB) {
3010 if (auto *SI = dyn_cast<SelectInst>(&I)) {
3011 auto *Cond = SI->getCondition();
3012 // Do not unswitch vector selects and logical and/or selects
3013 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
3014 AddUnswitchCandidatesForInst(SI, Cond);
3015 } else if (CollectGuards && isGuard(&I)) {
3016 auto *Cond =
3017 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
3018 // TODO: Support AND, OR conditions and partial unswitching.
3019 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
3020 UnswitchCandidates.push_back({&I, {Cond}});
3021 }
3022 }
3023
3024 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
3025 // We can only consider fully loop-invariant switch conditions as we need
3026 // to completely eliminate the switch after unswitching.
3027 if (!isa<Constant>(SI->getCondition()) &&
3028 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
3029 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
3030 continue;
3031 }
3032
3033 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
3034 if (!BI || !BI->isConditional() ||
3035 BI->getSuccessor(0) == BI->getSuccessor(1))
3036 continue;
3037
3038 AddUnswitchCandidatesForInst(BI, BI->getCondition());
3039 }
3040
3041 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
3042 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
3043 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
3044 })) {
3045 MemorySSA *MSSA = MSSAU->getMemorySSA();
3046 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
3047 LLVM_DEBUG(
3048 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
3049 << *Info->InstToDuplicate[0] << "\n");
3050 PartialIVInfo = *Info;
3051 PartialIVCondBranch = L.getHeader()->getTerminator();
3052 TinyPtrVector<Value *> ValsToDuplicate;
3053 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
3054 UnswitchCandidates.push_back(
3055 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
3056 }
3057 }
3058 return !UnswitchCandidates.empty();
3059}
3060
3061/// Tries to canonicalize condition described by:
3062///
3063/// br (LHS pred RHS), label IfTrue, label IfFalse
3064///
3065/// into its equivalent where `Pred` is something that we support for injected
3066/// invariants (so far it is limited to ult), LHS in canonicalized form is
3067/// non-invariant and RHS is an invariant.
3069 Value *&LHS, Value *&RHS,
3070 BasicBlock *&IfTrue,
3071 BasicBlock *&IfFalse,
3072 const Loop &L) {
3073 if (!L.contains(IfTrue)) {
3074 Pred = ICmpInst::getInversePredicate(Pred);
3075 std::swap(IfTrue, IfFalse);
3076 }
3077
3078 // Move loop-invariant argument to RHS position.
3079 if (L.isLoopInvariant(LHS)) {
3080 Pred = ICmpInst::getSwappedPredicate(Pred);
3081 std::swap(LHS, RHS);
3082 }
3083
3084 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
3085 // Turn "x >=s 0" into "x <u UMIN_INT"
3086 Pred = ICmpInst::ICMP_ULT;
3087 RHS = ConstantInt::get(
3088 RHS->getContext(),
3089 APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
3090 }
3091}
3092
3093/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
3094/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
3095/// injecting a loop-invariant condition.
3097 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3098 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3099 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3100 return false;
3101 // TODO: Support other predicates.
3102 if (Pred != ICmpInst::ICMP_ULT)
3103 return false;
3104 // TODO: Support non-loop-exiting branches?
3105 if (!L.contains(IfTrue) || L.contains(IfFalse))
3106 return false;
3107 // FIXME: For some reason this causes problems with MSSA updates, need to
3108 // investigate why. So far, just don't unswitch latch.
3109 if (L.getHeader() == IfTrue)
3110 return false;
3111 return true;
3112}
3113
3114/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3115/// TakenSucc via injection of invariant conditions. The branch should be not
3116/// enough and not previously unswitched, the information about this comes from
3117/// the metadata.
3119 const BasicBlock *TakenSucc) {
3120 SmallVector<uint32_t> Weights;
3121 if (!extractBranchWeights(*BI, Weights))
3122 return false;
3124 BranchProbability LikelyTaken(T - 1, T);
3125
3126 assert(Weights.size() == 2 && "Unexpected profile data!");
3127 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3128 auto Num = Weights[Idx];
3129 auto Denom = Weights[0] + Weights[1];
3130 // Degenerate or overflowed metadata.
3131 if (Denom == 0 || Num > Denom)
3132 return false;
3133 BranchProbability ActualTaken(Num, Denom);
3134 if (LikelyTaken > ActualTaken)
3135 return false;
3136 return true;
3137}
3138
3139/// Materialize pending invariant condition of the given candidate into IR. The
3140/// injected loop-invariant condition implies the original loop-variant branch
3141/// condition, so the materialization turns
3142///
3143/// loop_block:
3144/// ...
3145/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3146///
3147/// into
3148///
3149/// preheader:
3150/// %invariant_cond = LHS pred RHS
3151/// ...
3152/// loop_block:
3153/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3154/// OriginalCheck:
3155/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3156/// ...
3157static NonTrivialUnswitchCandidate
3158injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3159 DominatorTree &DT, LoopInfo &LI,
3160 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3161 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3162 BasicBlock *Preheader = L.getLoopPreheader();
3163 assert(Preheader && "Loop is not in simplified form?");
3164 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3165 "Unswitching branch of inner loop!");
3166
3167 auto Pred = Candidate.PendingInjection->Pred;
3168 auto *LHS = Candidate.PendingInjection->LHS;
3169 auto *RHS = Candidate.PendingInjection->RHS;
3170 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3171 auto *TI = cast<BranchInst>(Candidate.TI);
3172 auto *BB = Candidate.TI->getParent();
3173 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3174 : TI->getSuccessor(0);
3175 // FIXME: Remove this once limitation on successors is lifted.
3176 assert(L.contains(InLoopSucc) && "Not supported yet!");
3177 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3178 auto &Ctx = BB->getContext();
3179
3180 IRBuilder<> Builder(Preheader->getTerminator());
3181 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3182 if (LHS->getType() != RHS->getType()) {
3183 if (LHS->getType()->getIntegerBitWidth() <
3184 RHS->getType()->getIntegerBitWidth())
3185 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3186 else
3187 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3188 }
3189 // Do not use builder here: CreateICmp may simplify this into a constant and
3190 // unswitching will break. Better optimize it away later.
3191 auto *InjectedCond =
3192 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3193 Preheader->getTerminator()->getIterator());
3194
3195 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3196 BB->getParent(), InLoopSucc);
3197 Builder.SetInsertPoint(TI);
3198 auto *InvariantBr =
3199 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3200
3201 Builder.SetInsertPoint(CheckBlock);
3202 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3203 TI->getSuccessor(1));
3204 TI->eraseFromParent();
3205
3206 // Fixup phis.
3207 for (auto &I : *InLoopSucc) {
3208 auto *PN = dyn_cast<PHINode>(&I);
3209 if (!PN)
3210 break;
3211 auto *Inc = PN->getIncomingValueForBlock(BB);
3212 PN->addIncoming(Inc, CheckBlock);
3213 }
3214 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3215
3217 { DominatorTree::Insert, BB, CheckBlock },
3218 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3219 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3220 { DominatorTree::Delete, BB, OutOfLoopSucc }
3221 };
3222
3223 DT.applyUpdates(DTUpdates);
3224 if (MSSAU)
3225 MSSAU->applyUpdates(DTUpdates, DT);
3226 L.addBasicBlockToLoop(CheckBlock, LI);
3227
3228#ifndef NDEBUG
3229 DT.verify();
3230 LI.verify(DT);
3231 if (MSSAU && VerifyMemorySSA)
3232 MSSAU->getMemorySSA()->verifyMemorySSA();
3233#endif
3234
3235 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3236 // higher because we have just inserted a new block. Need to think how to
3237 // adjust the cost of injected candidates when it was first computed.
3238 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3239 << " and considering it for unswitching.");
3240 ++NumInvariantConditionsInjected;
3241 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3242 Candidate.Cost);
3243}
3244
3245/// Given chain of loop branch conditions looking like:
3246/// br (Variant < Invariant1)
3247/// br (Variant < Invariant2)
3248/// br (Variant < Invariant3)
3249/// ...
3250/// collect set of invariant conditions on which we want to unswitch, which
3251/// look like:
3252/// Invariant1 <= Invariant2
3253/// Invariant2 <= Invariant3
3254/// ...
3255/// Though they might not immediately exist in the IR, we can still inject them.
3257 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3259 const DominatorTree &DT) {
3260
3263 if (Compares.size() < 2)
3264 return false;
3266 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3267 Next != Compares.end(); ++Prev, ++Next) {
3268 Value *LHS = Next->Invariant;
3269 Value *RHS = Prev->Invariant;
3270 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3271 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3272 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3273 std::nullopt, std::move(ToInject));
3274 UnswitchCandidates.push_back(std::move(Candidate));
3275 }
3276 return true;
3277}
3278
3279/// Collect unswitch candidates by invariant conditions that are not immediately
3280/// present in the loop. However, they can be injected into the code if we
3281/// decide it's profitable.
3282/// An example of such conditions is following:
3283///
3284/// for (...) {
3285/// x = load ...
3286/// if (! x <u C1) break;
3287/// if (! x <u C2) break;
3288/// <do something>
3289/// }
3290///
3291/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3292/// C2" automatically implies "x <u C2", so we can get rid of one of
3293/// loop-variant checks in unswitched loop version.
3296 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3297 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3298 const MemorySSAUpdater *MSSAU) {
3300 return false;
3301
3302 if (!DT.isReachableFromEntry(L.getHeader()))
3303 return false;
3304 auto *Latch = L.getLoopLatch();
3305 // Need to have a single latch and a preheader.
3306 if (!Latch)
3307 return false;
3308 assert(L.getLoopPreheader() && "Must have a preheader!");
3309
3311 // Traverse the conditions that dominate latch (and therefore dominate each
3312 // other).
3313 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3314 DTN = DTN->getIDom()) {
3315 CmpPredicate Pred;
3316 Value *LHS = nullptr, *RHS = nullptr;
3317 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3318 auto *BB = DTN->getBlock();
3319 // Ignore inner loops.
3320 if (LI.getLoopFor(BB) != &L)
3321 continue;
3322 auto *Term = BB->getTerminator();
3323 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3324 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3325 continue;
3326 if (!LHS->getType()->isIntegerTy())
3327 continue;
3328 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3329 L);
3330 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3331 continue;
3333 continue;
3334 // Strip ZEXT for unsigned predicate.
3335 // TODO: once signed predicates are supported, also strip SEXT.
3336 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3337 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3338 LHS = Zext->getOperand(0);
3339 CandidatesULT[LHS].push_back(Desc);
3340 }
3341
3342 bool Found = false;
3343 for (auto &It : CandidatesULT)
3345 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3346 return Found;
3347}
3348
3350 if (!L.isSafeToClone())
3351 return false;
3352 for (auto *BB : L.blocks())
3353 for (auto &I : *BB) {
3354 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3355 return false;
3356 if (auto *CB = dyn_cast<CallBase>(&I)) {
3357 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3358 if (CB->isConvergent())
3359 return false;
3360 }
3361 }
3362
3363 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3364 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3365 // irreducible control flow into reducible control flow and introduce new
3366 // loops "out of thin air". If we ever discover important use cases for doing
3367 // this, we can add support to loop unswitch, but it is a lot of complexity
3368 // for what seems little or no real world benefit.
3369 LoopBlocksRPO RPOT(&L);
3370 RPOT.perform(&LI);
3372 return false;
3373
3375 L.getUniqueExitBlocks(ExitBlocks);
3376 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3377 // instruction as we don't know how to split those exit blocks.
3378 // FIXME: We should teach SplitBlock to handle this and remove this
3379 // restriction.
3380 for (auto *ExitBB : ExitBlocks) {
3381 auto It = ExitBB->getFirstNonPHIIt();
3383 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3384 "in exit block\n");
3385 return false;
3386 }
3387 }
3388
3389 return true;
3390}
3391
3392static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3393 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3394 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3395 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3396 // Given that unswitching these terminators will require duplicating parts of
3397 // the loop, so we need to be able to model that cost. Compute the ephemeral
3398 // values and set up a data structure to hold per-BB costs. We cache each
3399 // block's cost so that we don't recompute this when considering different
3400 // subsets of the loop for duplication during unswitching.
3402 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3404
3405 // Compute the cost of each block, as well as the total loop cost. Also, bail
3406 // out if we see instructions which are incompatible with loop unswitching
3407 // (convergent, noduplicate, or cross-basic-block tokens).
3408 // FIXME: We might be able to safely handle some of these in non-duplicated
3409 // regions.
3411 L.getHeader()->getParent()->hasMinSize()
3414 InstructionCost LoopCost = 0;
3415 for (auto *BB : L.blocks()) {
3416 InstructionCost Cost = 0;
3417 for (auto &I : *BB) {
3418 if (EphValues.count(&I))
3419 continue;
3420 Cost += TTI.getInstructionCost(&I, CostKind);
3421 }
3422 assert(Cost >= 0 && "Must not have negative costs!");
3423 LoopCost += Cost;
3424 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3425 BBCostMap[BB] = Cost;
3426 }
3427 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3428
3429 // Now we find the best candidate by searching for the one with the following
3430 // properties in order:
3431 //
3432 // 1) An unswitching cost below the threshold
3433 // 2) The smallest number of duplicated unswitch candidates (to avoid
3434 // creating redundant subsequent unswitching)
3435 // 3) The smallest cost after unswitching.
3436 //
3437 // We prioritize reducing fanout of unswitch candidates provided the cost
3438 // remains below the threshold because this has a multiplicative effect.
3439 //
3440 // This requires memoizing each dominator subtree to avoid redundant work.
3441 //
3442 // FIXME: Need to actually do the number of candidates part above.
3444 // Given a terminator which might be unswitched, computes the non-duplicated
3445 // cost for that terminator.
3446 auto ComputeUnswitchedCost = [&](Instruction &TI,
3447 bool FullUnswitch) -> InstructionCost {
3448 // Unswitching selects unswitches the entire loop.
3449 if (isa<SelectInst>(TI))
3450 return LoopCost;
3451
3452 BasicBlock &BB = *TI.getParent();
3454
3455 InstructionCost Cost = 0;
3456 for (BasicBlock *SuccBB : successors(&BB)) {
3457 // Don't count successors more than once.
3458 if (!Visited.insert(SuccBB).second)
3459 continue;
3460
3461 // If this is a partial unswitch candidate, then it must be a conditional
3462 // branch with a condition of either `or`, `and`, their corresponding
3463 // select forms or partially invariant instructions. In that case, one of
3464 // the successors is necessarily duplicated, so don't even try to remove
3465 // its cost.
3466 if (!FullUnswitch) {
3467 auto &BI = cast<BranchInst>(TI);
3468 Value *Cond = skipTrivialSelect(BI.getCondition());
3469 if (match(Cond, m_LogicalAnd())) {
3470 if (SuccBB == BI.getSuccessor(1))
3471 continue;
3472 } else if (match(Cond, m_LogicalOr())) {
3473 if (SuccBB == BI.getSuccessor(0))
3474 continue;
3475 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3476 SuccBB == BI.getSuccessor(0)) ||
3477 (!PartialIVInfo.KnownValue->isOneValue() &&
3478 SuccBB == BI.getSuccessor(1)))
3479 continue;
3480 }
3481
3482 // This successor's domtree will not need to be duplicated after
3483 // unswitching if the edge to the successor dominates it (and thus the
3484 // entire tree). This essentially means there is no other path into this
3485 // subtree and so it will end up live in only one clone of the loop.
3486 if (SuccBB->getUniquePredecessor() ||
3487 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3488 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3489 })) {
3490 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3491 assert(Cost <= LoopCost &&
3492 "Non-duplicated cost should never exceed total loop cost!");
3493 }
3494 }
3495
3496 // Now scale the cost by the number of unique successors minus one. We
3497 // subtract one because there is already at least one copy of the entire
3498 // loop. This is computing the new cost of unswitching a condition.
3499 // Note that guards always have 2 unique successors that are implicit and
3500 // will be materialized if we decide to unswitch it.
3501 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3502 assert(SuccessorsCount > 1 &&
3503 "Cannot unswitch a condition without multiple distinct successors!");
3504 return (LoopCost - Cost) * (SuccessorsCount - 1);
3505 };
3506
3507 std::optional<NonTrivialUnswitchCandidate> Best;
3508 for (auto &Candidate : UnswitchCandidates) {
3509 Instruction &TI = *Candidate.TI;
3510 ArrayRef<Value *> Invariants = Candidate.Invariants;
3512 bool FullUnswitch =
3513 !BI || Candidate.hasPendingInjection() ||
3514 (Invariants.size() == 1 &&
3515 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3516 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3517 // Calculate cost multiplier which is a tool to limit potentially
3518 // exponential behavior of loop-unswitch.
3520 int CostMultiplier =
3521 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3522 assert(
3523 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3524 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3525 CandidateCost *= CostMultiplier;
3526 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3527 << " (multiplier: " << CostMultiplier << ")"
3528 << " for unswitch candidate: " << TI << "\n");
3529 } else {
3530 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3531 << " for unswitch candidate: " << TI << "\n");
3532 }
3533
3534 if (!Best || CandidateCost < Best->Cost) {
3535 Best = Candidate;
3536 Best->Cost = CandidateCost;
3537 }
3538 }
3539 assert(Best && "Must be!");
3540 return *Best;
3541}
3542
3543// Insert a freeze on an unswitched branch if all is true:
3544// 1. freeze-loop-unswitch-cond option is true
3545// 2. The branch may not execute in the loop pre-transformation. If a branch may
3546// not execute and could cause UB, it would always cause UB if it is hoisted outside
3547// of the loop. Insert a freeze to prevent this case.
3548// 3. The branch condition may be poison or undef
3550 AssumptionCache &AC) {
3553 return false;
3554
3555 ICFLoopSafetyInfo SafetyInfo;
3556 SafetyInfo.computeLoopSafetyInfo(&L);
3557 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3558 return false;
3559
3560 Value *Cond;
3561 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3562 Cond = skipTrivialSelect(BI->getCondition());
3563 else
3566 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3567}
3568
3572 MemorySSAUpdater *MSSAU,
3573 LPMUpdater &LoopUpdater) {
3574 // Collect all invariant conditions within this loop (as opposed to an inner
3575 // loop which would be handled when visiting that inner loop).
3577 IVConditionInfo PartialIVInfo;
3578 Instruction *PartialIVCondBranch = nullptr;
3579 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3580 PartialIVCondBranch, L, LI, AA, MSSAU);
3581 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3582 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3583 PartialIVCondBranch, L, DT, LI, AA,
3584 MSSAU);
3585 // If we didn't find any candidates, we're done.
3586 if (UnswitchCandidates.empty())
3587 return false;
3588
3589 LLVM_DEBUG(
3590 dbgs() << "Considering " << UnswitchCandidates.size()
3591 << " non-trivial loop invariant conditions for unswitching.\n");
3592
3593 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3594 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3595
3596 assert(Best.TI && "Failed to find loop unswitch candidate");
3597 assert(Best.Cost && "Failed to compute cost");
3598
3599 if (*Best.Cost >= UnswitchThreshold) {
3600 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3601 << "\n");
3602 return false;
3603 }
3604
3605 bool InjectedCondition = false;
3606 if (Best.hasPendingInjection()) {
3607 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3608 InjectedCondition = true;
3609 }
3610 assert(!Best.hasPendingInjection() &&
3611 "All injections should have been done by now!");
3612
3613 if (Best.TI != PartialIVCondBranch)
3614 PartialIVInfo.InstToDuplicate.clear();
3615
3616 bool InsertFreeze;
3617 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3618 // If the best candidate is a select, turn it into a branch. Select
3619 // instructions with a poison conditional do not propagate poison, but
3620 // branching on poison causes UB. Insert a freeze on the select
3621 // conditional to prevent UB after turning the select into a branch.
3622 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3623 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3624 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3625 } else {
3626 // If the best candidate is a guard, turn it into a branch.
3627 if (isGuard(Best.TI))
3628 Best.TI =
3629 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3630 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3631 }
3632
3633 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3634 << ") terminator: " << *Best.TI << "\n");
3635 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3636 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3637 InjectedCondition);
3638 return true;
3639}
3640
3641/// Unswitch control flow predicated on loop invariant conditions.
3642///
3643/// This first hoists all branches or switches which are trivial (IE, do not
3644/// require duplicating any part of the loop) out of the loop body. It then
3645/// looks at other loop invariant control flows and tries to unswitch those as
3646/// well by cloning the loop if the result is small enough.
3647///
3648/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3649/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3650/// valid (i.e. its use is enabled).
3651///
3652/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3653/// true, we will attempt to do non-trivial unswitching as well as trivial
3654/// unswitching.
3655///
3656/// The `postUnswitch` function will be run after unswitching is complete
3657/// with information on whether or not the provided loop remains a loop and
3658/// a list of new sibling loops created.
3659///
3660/// If `SE` is non-null, we will update that analysis based on the unswitching
3661/// done.
3662static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3664 TargetTransformInfo &TTI, bool Trivial,
3665 bool NonTrivial, ScalarEvolution *SE,
3666 MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
3667 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3668 "Loops must be in LCSSA form before unswitching.");
3669
3670 // Must be in loop simplified form: we need a preheader and dedicated exits.
3671 if (!L.isLoopSimplifyForm())
3672 return false;
3673
3674 // Try trivial unswitch first before loop over other basic blocks in the loop.
3675 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3676 // If we unswitched successfully we will want to clean up the loop before
3677 // processing it further so just mark it as unswitched and return.
3678 postUnswitch(L, LoopUpdater, L.getName(),
3679 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3680 /*InjectedCondition*/ false, {});
3681 return true;
3682 }
3683
3684 const Function *F = L.getHeader()->getParent();
3685
3686 // Check whether we should continue with non-trivial conditions.
3687 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3688 // unswitching for testing and debugging.
3689 // NonTrivial: Parameter that enables non-trivial unswitching for this
3690 // invocation of the transform. But this should be allowed only
3691 // for targets without branch divergence.
3692 //
3693 // FIXME: If divergence analysis becomes available to a loop
3694 // transform, we should allow unswitching for non-trivial uniform
3695 // branches even on targets that have divergence.
3696 // https://bugs.llvm.org/show_bug.cgi?id=48819
3697 bool ContinueWithNonTrivial =
3698 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
3699 if (!ContinueWithNonTrivial)
3700 return false;
3701
3702 // Skip non-trivial unswitching for optsize functions.
3703 if (F->hasOptSize())
3704 return false;
3705
3706 // Perform legality checks.
3708 return false;
3709
3710 // For non-trivial unswitching, because it often creates new loops, we rely on
3711 // the pass manager to iterate on the loops rather than trying to immediately
3712 // reach a fixed point. There is no substantial advantage to iterating
3713 // internally, and if any of the new loops are simplified enough to contain
3714 // trivial unswitching we want to prefer those.
3715
3716 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3717 // a partial unswitch when possible below the threshold.
3718 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3719 return true;
3720
3721 // No other opportunities to unswitch.
3722 return false;
3723}
3724
3727 LPMUpdater &U) {
3728 Function &F = *L.getHeader()->getParent();
3729 (void)F;
3730 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3731 << "\n");
3732
3733 std::optional<MemorySSAUpdater> MSSAU;
3734 if (AR.MSSA) {
3735 MSSAU = MemorySSAUpdater(AR.MSSA);
3736 if (VerifyMemorySSA)
3737 AR.MSSA->verifyMemorySSA();
3738 }
3739 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3740 &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
3741 return PreservedAnalyses::all();
3742
3743 if (AR.MSSA && VerifyMemorySSA)
3744 AR.MSSA->verifyMemorySSA();
3745
3746 // Historically this pass has had issues with the dominator tree so verify it
3747 // in asserts builds.
3748 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3749
3750 auto PA = getLoopPassPreservedAnalyses();
3751 if (AR.MSSA)
3752 PA.preserve<MemorySSAAnalysis>();
3753 return PA;
3754}
3755
3757 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3759 OS, MapClassName2PassName);
3760
3761 OS << '<';
3762 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3763 OS << (Trivial ? "" : "no-") << "trivial";
3764 OS << '>';
3765}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
static Value * getCondition(Instruction *I)
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch)
Copy a set of loop invariant values, and conditionally branch on them.
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, const BranchInst &ComputeProfFrom)
Copy a set of loop invariant values Invariants and insert them at the end of BB and conditionally bra...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:132
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
iterator begin() const
Definition ArrayRef.h:131
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:138
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
size_t size() const
Definition BasicBlock.h:480
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition BasicBlock.h:386
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
static LLVM_ABI bool isStrictPredicate(Predicate predicate)
This is a static version that you can use without an instruction available.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getCompilerGenerated()
Definition DebugLoc.h:163
static DebugLoc getDropped()
Definition DebugLoc.h:164
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator begin()
Definition DenseMap.h:78
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
StringRef getName() const
Definition LoopInfo.h:389
Metadata node.
Definition Metadata.h:1078
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
LLVM_ABI void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
LLVM_ABI void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
LLVM_ABI void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
LLVM_ABI MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
LLVM_ABI void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
LLVM_ABI void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
LLVM_ABI void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
LLVM_ABI void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition MemorySSA.h:768
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
The main scalar evolution driver.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:261
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition SetVector.h:105
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition ValueMap.h:167
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition ValueMap.h:156
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:382
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1091
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ?
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2058
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, StringRef PassName)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Op::Description Desc
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:95
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition CFG.h:149
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition STLExtras.h:852
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:58
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1703
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static cl::opt< bool > EstimateProfile("simple-loop-unswitch-estimate-profile", cl::Hidden, cl::init(true))
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
auto predecessors(const MachineBasicBlock *BB)
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
static cl::opt< int > UnswitchParentBlocksDiv("unswitch-parent-blocks-div", cl::init(8), cl::Hidden, cl::desc("Outer loop size divisor for cost multiplier."))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Struct to hold information about a partially invariant condition.
Definition LoopUtils.h:622
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition LoopUtils.h:625
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition LoopUtils.h:628
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70