LLVM 18.0.0git
LoopUnroll.cpp
Go to the documentation of this file.
1//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements some loop unrolling utilities. It does not define any
10// actual pass or policy, but provides a single function to perform loop
11// unrolling.
12//
13// The process of unrolling can produce extraneous basic blocks linked with
14// unconditional branches. This will be corrected in the future.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/ADT/Twine.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/CFG.h"
37#include "llvm/IR/Constants.h"
39#include "llvm/IR/DebugLoc.h"
41#include "llvm/IR/Dominators.h"
42#include "llvm/IR/Function.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Metadata.h"
47#include "llvm/IR/Module.h"
49#include "llvm/IR/Use.h"
50#include "llvm/IR/User.h"
51#include "llvm/IR/ValueHandle.h"
52#include "llvm/IR/ValueMap.h"
55#include "llvm/Support/Debug.h"
67#include <algorithm>
68#include <assert.h>
69#include <numeric>
70#include <type_traits>
71#include <vector>
72
73namespace llvm {
74class DataLayout;
75class Value;
76} // namespace llvm
77
78using namespace llvm;
79
80#define DEBUG_TYPE "loop-unroll"
81
82// TODO: Should these be here or in LoopUnroll?
83STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
84STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
85STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
86 "latch (completely or otherwise)");
87
88static cl::opt<bool>
89UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
90 cl::desc("Allow runtime unrolled loops to be unrolled "
91 "with epilog instead of prolog."));
92
93static cl::opt<bool>
94UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
95 cl::desc("Verify domtree after unrolling"),
96#ifdef EXPENSIVE_CHECKS
97 cl::init(true)
98#else
99 cl::init(false)
100#endif
101 );
102
103static cl::opt<bool>
104UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
105 cl::desc("Verify loopinfo after unrolling"),
106#ifdef EXPENSIVE_CHECKS
107 cl::init(true)
108#else
109 cl::init(false)
110#endif
111 );
112
113
114/// Check if unrolling created a situation where we need to insert phi nodes to
115/// preserve LCSSA form.
116/// \param Blocks is a vector of basic blocks representing unrolled loop.
117/// \param L is the outer loop.
118/// It's possible that some of the blocks are in L, and some are not. In this
119/// case, if there is a use is outside L, and definition is inside L, we need to
120/// insert a phi-node, otherwise LCSSA will be broken.
121/// The function is just a helper function for llvm::UnrollLoop that returns
122/// true if this situation occurs, indicating that LCSSA needs to be fixed.
124 const std::vector<BasicBlock *> &Blocks,
125 LoopInfo *LI) {
126 for (BasicBlock *BB : Blocks) {
127 if (LI->getLoopFor(BB) == L)
128 continue;
129 for (Instruction &I : *BB) {
130 for (Use &U : I.operands()) {
131 if (const auto *Def = dyn_cast<Instruction>(U)) {
132 Loop *DefLoop = LI->getLoopFor(Def->getParent());
133 if (!DefLoop)
134 continue;
135 if (DefLoop->contains(L))
136 return true;
137 }
138 }
139 }
140 }
141 return false;
142}
143
144/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
145/// and adds a mapping from the original loop to the new loop to NewLoops.
146/// Returns nullptr if no new loop was created and a pointer to the
147/// original loop OriginalBB was part of otherwise.
149 BasicBlock *ClonedBB, LoopInfo *LI,
150 NewLoopsMap &NewLoops) {
151 // Figure out which loop New is in.
152 const Loop *OldLoop = LI->getLoopFor(OriginalBB);
153 assert(OldLoop && "Should (at least) be in the loop being unrolled!");
154
155 Loop *&NewLoop = NewLoops[OldLoop];
156 if (!NewLoop) {
157 // Found a new sub-loop.
158 assert(OriginalBB == OldLoop->getHeader() &&
159 "Header should be first in RPO");
160
161 NewLoop = LI->AllocateLoop();
162 Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
163
164 if (NewLoopParent)
165 NewLoopParent->addChildLoop(NewLoop);
166 else
167 LI->addTopLevelLoop(NewLoop);
168
169 NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
170 return OldLoop;
171 } else {
172 NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
173 return nullptr;
174 }
175}
176
177/// The function chooses which type of unroll (epilog or prolog) is more
178/// profitabale.
179/// Epilog unroll is more profitable when there is PHI that starts from
180/// constant. In this case epilog will leave PHI start from constant,
181/// but prolog will convert it to non-constant.
182///
183/// loop:
184/// PN = PHI [I, Latch], [CI, PreHeader]
185/// I = foo(PN)
186/// ...
187///
188/// Epilog unroll case.
189/// loop:
190/// PN = PHI [I2, Latch], [CI, PreHeader]
191/// I1 = foo(PN)
192/// I2 = foo(I1)
193/// ...
194/// Prolog unroll case.
195/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
196/// loop:
197/// PN = PHI [I2, Latch], [NewPN, PreHeader]
198/// I1 = foo(PN)
199/// I2 = foo(I1)
200/// ...
201///
202static bool isEpilogProfitable(Loop *L) {
203 BasicBlock *PreHeader = L->getLoopPreheader();
204 BasicBlock *Header = L->getHeader();
205 assert(PreHeader && Header);
206 for (const PHINode &PN : Header->phis()) {
207 if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
208 return true;
209 }
210 return false;
211}
212
213/// Perform some cleanup and simplifications on loops after unrolling. It is
214/// useful to simplify the IV's in the new loop, as well as do a quick
215/// simplify/dce pass of the instructions.
216void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
218 AssumptionCache *AC,
219 const TargetTransformInfo *TTI) {
220 using namespace llvm::PatternMatch;
221
222 // Simplify any new induction variables in the partially unrolled loop.
223 if (SE && SimplifyIVs) {
225 simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts);
226
227 // Aggressively clean up dead instructions that simplifyLoopIVs already
228 // identified. Any remaining should be cleaned up below.
229 while (!DeadInsts.empty()) {
230 Value *V = DeadInsts.pop_back_val();
231 if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
233 }
234 }
235
236 // At this point, the code is well formed. Perform constprop, instsimplify,
237 // and dce.
238 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
240 for (BasicBlock *BB : L->getBlocks()) {
241 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
242 if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
243 if (LI->replacementPreservesLCSSAForm(&Inst, V))
244 Inst.replaceAllUsesWith(V);
246 DeadInsts.emplace_back(&Inst);
247
248 // Fold ((add X, C1), C2) to (add X, C1+C2). This is very common in
249 // unrolled loops, and handling this early allows following code to
250 // identify the IV as a "simple recurrence" without first folding away
251 // a long chain of adds.
252 {
253 Value *X;
254 const APInt *C1, *C2;
255 if (match(&Inst, m_Add(m_Add(m_Value(X), m_APInt(C1)), m_APInt(C2)))) {
256 auto *InnerI = dyn_cast<Instruction>(Inst.getOperand(0));
257 auto *InnerOBO = cast<OverflowingBinaryOperator>(Inst.getOperand(0));
258 bool SignedOverflow;
259 APInt NewC = C1->sadd_ov(*C2, SignedOverflow);
260 Inst.setOperand(0, X);
261 Inst.setOperand(1, ConstantInt::get(Inst.getType(), NewC));
262 Inst.setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap() &&
263 InnerOBO->hasNoUnsignedWrap());
264 Inst.setHasNoSignedWrap(Inst.hasNoSignedWrap() &&
265 InnerOBO->hasNoSignedWrap() &&
266 !SignedOverflow);
267 if (InnerI && isInstructionTriviallyDead(InnerI))
268 DeadInsts.emplace_back(InnerI);
269 }
270 }
271 }
272 // We can't do recursive deletion until we're done iterating, as we might
273 // have a phi which (potentially indirectly) uses instructions later in
274 // the block we're iterating through.
276 }
277}
278
279/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
280/// can only fail when the loop's latch block is not terminated by a conditional
281/// branch instruction. However, if the trip count (and multiple) are not known,
282/// loop unrolling will mostly produce more code that is no faster.
283///
284/// If Runtime is true then UnrollLoop will try to insert a prologue or
285/// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop
286/// will not runtime-unroll the loop if computing the run-time trip count will
287/// be expensive and AllowExpensiveTripCount is false.
288///
289/// The LoopInfo Analysis that is passed will be kept consistent.
290///
291/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
292/// DominatorTree if they are non-null.
293///
294/// If RemainderLoop is non-null, it will receive the remainder loop (if
295/// required and not fully unrolled).
298 AssumptionCache *AC,
301 bool PreserveLCSSA, Loop **RemainderLoop) {
302 assert(DT && "DomTree is required");
303
304 if (!L->getLoopPreheader()) {
305 LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
307 }
308
309 if (!L->getLoopLatch()) {
310 LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
312 }
313
314 // Loops with indirectbr cannot be cloned.
315 if (!L->isSafeToClone()) {
316 LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
318 }
319
320 if (L->getHeader()->hasAddressTaken()) {
321 // The loop-rotate pass can be helpful to avoid this in many cases.
323 dbgs() << " Won't unroll loop: address of header block is taken.\n");
325 }
326
327 assert(ULO.Count > 0);
328
329 // All these values should be taken only after peeling because they might have
330 // changed.
331 BasicBlock *Preheader = L->getLoopPreheader();
332 BasicBlock *Header = L->getHeader();
333 BasicBlock *LatchBlock = L->getLoopLatch();
335 L->getExitBlocks(ExitBlocks);
336 std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
337
338 const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
339 const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
340 unsigned EstimatedLoopInvocationWeight = 0;
341 std::optional<unsigned> OriginalTripCount =
342 llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight);
343
344 // Effectively "DCE" unrolled iterations that are beyond the max tripcount
345 // and will never be executed.
346 if (MaxTripCount && ULO.Count > MaxTripCount)
347 ULO.Count = MaxTripCount;
348
349 struct ExitInfo {
350 unsigned TripCount;
351 unsigned TripMultiple;
352 unsigned BreakoutTrip;
353 bool ExitOnTrue;
354 BasicBlock *FirstExitingBlock = nullptr;
355 SmallVector<BasicBlock *> ExitingBlocks;
356 };
358 SmallVector<BasicBlock *, 4> ExitingBlocks;
359 L->getExitingBlocks(ExitingBlocks);
360 for (auto *ExitingBlock : ExitingBlocks) {
361 // The folding code is not prepared to deal with non-branch instructions
362 // right now.
363 auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
364 if (!BI)
365 continue;
366
367 ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second;
368 Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
369 Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
370 if (Info.TripCount != 0) {
371 Info.BreakoutTrip = Info.TripCount % ULO.Count;
372 Info.TripMultiple = 0;
373 } else {
374 Info.BreakoutTrip = Info.TripMultiple =
375 (unsigned)std::gcd(ULO.Count, Info.TripMultiple);
376 }
377 Info.ExitOnTrue = !L->contains(BI->getSuccessor(0));
378 Info.ExitingBlocks.push_back(ExitingBlock);
379 LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName()
380 << ": TripCount=" << Info.TripCount
381 << ", TripMultiple=" << Info.TripMultiple
382 << ", BreakoutTrip=" << Info.BreakoutTrip << "\n");
383 }
384
385 // Are we eliminating the loop control altogether? Note that we can know
386 // we're eliminating the backedge without knowing exactly which iteration
387 // of the unrolled body exits.
388 const bool CompletelyUnroll = ULO.Count == MaxTripCount;
389
390 const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero;
391
392 // There's no point in performing runtime unrolling if this unroll count
393 // results in a full unroll.
394 if (CompletelyUnroll)
395 ULO.Runtime = false;
396
397 // Go through all exits of L and see if there are any phi-nodes there. We just
398 // conservatively assume that they're inserted to preserve LCSSA form, which
399 // means that complete unrolling might break this form. We need to either fix
400 // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
401 // now we just recompute LCSSA for the outer loop, but it should be possible
402 // to fix it in-place.
403 bool NeedToFixLCSSA =
404 PreserveLCSSA && CompletelyUnroll &&
405 any_of(ExitBlocks,
406 [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
407
408 // The current loop unroll pass can unroll loops that have
409 // (1) single latch; and
410 // (2a) latch is unconditional; or
411 // (2b) latch is conditional and is an exiting block
412 // FIXME: The implementation can be extended to work with more complicated
413 // cases, e.g. loops with multiple latches.
414 BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
415
416 // A conditional branch which exits the loop, which can be optimized to an
417 // unconditional branch in the unrolled loop in some cases.
418 bool LatchIsExiting = L->isLoopExiting(LatchBlock);
419 if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
421 dbgs() << "Can't unroll; a conditional latch must exit the loop");
422 return LoopUnrollResult::Unmodified;
423 }
424
425 // Loops containing convergent instructions cannot use runtime unrolling,
426 // as the prologue/epilogue may add additional control-dependencies to
427 // convergent operations.
429 {
430 bool HasConvergent = false;
431 for (auto &BB : L->blocks())
432 for (auto &I : *BB)
433 if (auto *CB = dyn_cast<CallBase>(&I))
434 HasConvergent |= CB->isConvergent();
435 assert((!HasConvergent || !ULO.Runtime) &&
436 "Can't runtime unroll if loop contains a convergent operation.");
437 });
438
439 bool EpilogProfitability =
440 UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
442
443 if (ULO.Runtime &&
445 EpilogProfitability, ULO.UnrollRemainder,
446 ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
447 PreserveLCSSA, RemainderLoop)) {
448 if (ULO.Force)
449 ULO.Runtime = false;
450 else {
451 LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
452 "generated when assuming runtime trip count\n");
453 return LoopUnrollResult::Unmodified;
454 }
455 }
456
457 using namespace ore;
458 // Report the unrolling decision.
459 if (CompletelyUnroll) {
460 LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
461 << " with trip count " << ULO.Count << "!\n");
462 if (ORE)
463 ORE->emit([&]() {
464 return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
465 L->getHeader())
466 << "completely unrolled loop with "
467 << NV("UnrollCount", ULO.Count) << " iterations";
468 });
469 } else {
470 LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
471 << ULO.Count);
472 if (ULO.Runtime)
473 LLVM_DEBUG(dbgs() << " with run-time trip count");
474 LLVM_DEBUG(dbgs() << "!\n");
475
476 if (ORE)
477 ORE->emit([&]() {
478 OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
479 L->getHeader());
480 Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count);
481 if (ULO.Runtime)
482 Diag << " with run-time trip count";
483 return Diag;
484 });
485 }
486
487 // We are going to make changes to this loop. SCEV may be keeping cached info
488 // about it, in particular about backedge taken count. The changes we make
489 // are guaranteed to invalidate this information for our loop. It is tempting
490 // to only invalidate the loop being unrolled, but it is incorrect as long as
491 // all exiting branches from all inner loops have impact on the outer loops,
492 // and if something changes inside them then any of outer loops may also
493 // change. When we forget outermost loop, we also forget all contained loops
494 // and this is what we need here.
495 if (SE) {
496 if (ULO.ForgetAllSCEV)
497 SE->forgetAllLoops();
498 else {
499 SE->forgetTopmostLoop(L);
501 }
502 }
503
504 if (!LatchIsExiting)
505 ++NumUnrolledNotLatch;
506
507 // For the first iteration of the loop, we should use the precloned values for
508 // PHI nodes. Insert associations now.
509 ValueToValueMapTy LastValueMap;
510 std::vector<PHINode*> OrigPHINode;
511 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
512 OrigPHINode.push_back(cast<PHINode>(I));
513 }
514
515 std::vector<BasicBlock *> Headers;
516 std::vector<BasicBlock *> Latches;
517 Headers.push_back(Header);
518 Latches.push_back(LatchBlock);
519
520 // The current on-the-fly SSA update requires blocks to be processed in
521 // reverse postorder so that LastValueMap contains the correct value at each
522 // exit.
523 LoopBlocksDFS DFS(L);
524 DFS.perform(LI);
525
526 // Stash the DFS iterators before adding blocks to the loop.
527 LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
528 LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
529
530 std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
531
532 // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
533 // might break loop-simplified form for these loops (as they, e.g., would
534 // share the same exit blocks). We'll keep track of loops for which we can
535 // break this so that later we can re-simplify them.
536 SmallSetVector<Loop *, 4> LoopsToSimplify;
537 for (Loop *SubLoop : *L)
538 LoopsToSimplify.insert(SubLoop);
539
540 // When a FSDiscriminator is enabled, we don't need to add the multiply
541 // factors to the discriminators.
542 if (Header->getParent()->shouldEmitDebugInfoForProfiling() &&
544 for (BasicBlock *BB : L->getBlocks())
545 for (Instruction &I : *BB)
546 if (!I.isDebugOrPseudoInst())
547 if (const DILocation *DIL = I.getDebugLoc()) {
548 auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
549 if (NewDIL)
550 I.setDebugLoc(*NewDIL);
551 else
553 << "Failed to create new discriminator: "
554 << DIL->getFilename() << " Line: " << DIL->getLine());
555 }
556
557 // Identify what noalias metadata is inside the loop: if it is inside the
558 // loop, the associated metadata must be cloned for each iteration.
559 SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
560 identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
561
562 // We place the unrolled iterations immediately after the original loop
563 // latch. This is a reasonable default placement if we don't have block
564 // frequencies, and if we do, well the layout will be adjusted later.
565 auto BlockInsertPt = std::next(LatchBlock->getIterator());
566 for (unsigned It = 1; It != ULO.Count; ++It) {
569 NewLoops[L] = L;
570
571 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
573 BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
574 Header->getParent()->insert(BlockInsertPt, New);
575
576 assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
577 "Header should not be in a sub-loop");
578 // Tell LI about New.
579 const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
580 if (OldLoop)
581 LoopsToSimplify.insert(NewLoops[OldLoop]);
582
583 if (*BB == Header)
584 // Loop over all of the PHI nodes in the block, changing them to use
585 // the incoming values from the previous block.
586 for (PHINode *OrigPHI : OrigPHINode) {
587 PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
588 Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
589 if (Instruction *InValI = dyn_cast<Instruction>(InVal))
590 if (It > 1 && L->contains(InValI))
591 InVal = LastValueMap[InValI];
592 VMap[OrigPHI] = InVal;
593 NewPHI->eraseFromParent();
594 }
595
596 // Update our running map of newest clones
597 LastValueMap[*BB] = New;
598 for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
599 VI != VE; ++VI)
600 LastValueMap[VI->first] = VI->second;
601
602 // Add phi entries for newly created values to all exit blocks.
603 for (BasicBlock *Succ : successors(*BB)) {
604 if (L->contains(Succ))
605 continue;
606 for (PHINode &PHI : Succ->phis()) {
607 Value *Incoming = PHI.getIncomingValueForBlock(*BB);
608 ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
609 if (It != LastValueMap.end())
610 Incoming = It->second;
611 PHI.addIncoming(Incoming, New);
612 SE->forgetValue(&PHI);
613 }
614 }
615 // Keep track of new headers and latches as we create them, so that
616 // we can insert the proper branches later.
617 if (*BB == Header)
618 Headers.push_back(New);
619 if (*BB == LatchBlock)
620 Latches.push_back(New);
621
622 // Keep track of the exiting block and its successor block contained in
623 // the loop for the current iteration.
624 auto ExitInfoIt = ExitInfos.find(*BB);
625 if (ExitInfoIt != ExitInfos.end())
626 ExitInfoIt->second.ExitingBlocks.push_back(New);
627
628 NewBlocks.push_back(New);
629 UnrolledLoopBlocks.push_back(New);
630
631 // Update DomTree: since we just copy the loop body, and each copy has a
632 // dedicated entry block (copy of the header block), this header's copy
633 // dominates all copied blocks. That means, dominance relations in the
634 // copied body are the same as in the original body.
635 if (*BB == Header)
636 DT->addNewBlock(New, Latches[It - 1]);
637 else {
638 auto BBDomNode = DT->getNode(*BB);
639 auto BBIDom = BBDomNode->getIDom();
640 BasicBlock *OriginalBBIDom = BBIDom->getBlock();
641 DT->addNewBlock(
642 New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
643 }
644 }
645
646 // Remap all instructions in the most recent iteration
647 remapInstructionsInBlocks(NewBlocks, LastValueMap);
648 for (BasicBlock *NewBlock : NewBlocks)
649 for (Instruction &I : *NewBlock)
650 if (auto *II = dyn_cast<AssumeInst>(&I))
651 AC->registerAssumption(II);
652
653 {
654 // Identify what other metadata depends on the cloned version. After
655 // cloning, replace the metadata with the corrected version for both
656 // memory instructions and noalias intrinsics.
657 std::string ext = (Twine("It") + Twine(It)).str();
658 cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
659 Header->getContext(), ext);
660 }
661 }
662
663 // Loop over the PHI nodes in the original block, setting incoming values.
664 for (PHINode *PN : OrigPHINode) {
665 if (CompletelyUnroll) {
666 PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
667 PN->eraseFromParent();
668 } else if (ULO.Count > 1) {
669 Value *InVal = PN->removeIncomingValue(LatchBlock, false);
670 // If this value was defined in the loop, take the value defined by the
671 // last iteration of the loop.
672 if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
673 if (L->contains(InValI))
674 InVal = LastValueMap[InVal];
675 }
676 assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
677 PN->addIncoming(InVal, Latches.back());
678 }
679 }
680
681 // Connect latches of the unrolled iterations to the headers of the next
682 // iteration. Currently they point to the header of the same iteration.
683 for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
684 unsigned j = (i + 1) % e;
685 Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]);
686 }
687
688 // Update dominators of blocks we might reach through exits.
689 // Immediate dominator of such block might change, because we add more
690 // routes which can lead to the exit: we can now reach it from the copied
691 // iterations too.
692 if (ULO.Count > 1) {
693 for (auto *BB : OriginalLoopBlocks) {
694 auto *BBDomNode = DT->getNode(BB);
695 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
696 for (auto *ChildDomNode : BBDomNode->children()) {
697 auto *ChildBB = ChildDomNode->getBlock();
698 if (!L->contains(ChildBB))
699 ChildrenToUpdate.push_back(ChildBB);
700 }
701 // The new idom of the block will be the nearest common dominator
702 // of all copies of the previous idom. This is equivalent to the
703 // nearest common dominator of the previous idom and the first latch,
704 // which dominates all copies of the previous idom.
705 BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
706 for (auto *ChildBB : ChildrenToUpdate)
707 DT->changeImmediateDominator(ChildBB, NewIDom);
708 }
709 }
710
712 DT->verify(DominatorTree::VerificationLevel::Fast));
713
715 auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) {
716 auto *Term = cast<BranchInst>(Src->getTerminator());
717 const unsigned Idx = ExitOnTrue ^ WillExit;
718 BasicBlock *Dest = Term->getSuccessor(Idx);
719 BasicBlock *DeadSucc = Term->getSuccessor(1-Idx);
720
721 // Remove predecessors from all non-Dest successors.
722 DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true);
723
724 // Replace the conditional branch with an unconditional one.
725 BranchInst::Create(Dest, Term);
726 Term->eraseFromParent();
727
728 DTUpdates.emplace_back(DominatorTree::Delete, Src, DeadSucc);
729 };
730
731 auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j,
732 bool IsLatch) -> std::optional<bool> {
733 if (CompletelyUnroll) {
734 if (PreserveOnlyFirst) {
735 if (i == 0)
736 return std::nullopt;
737 return j == 0;
738 }
739 // Complete (but possibly inexact) unrolling
740 if (j == 0)
741 return true;
742 if (Info.TripCount && j != Info.TripCount)
743 return false;
744 return std::nullopt;
745 }
746
747 if (ULO.Runtime) {
748 // If runtime unrolling inserts a prologue, information about non-latch
749 // exits may be stale.
750 if (IsLatch && j != 0)
751 return false;
752 return std::nullopt;
753 }
754
755 if (j != Info.BreakoutTrip &&
756 (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) {
757 // If we know the trip count or a multiple of it, we can safely use an
758 // unconditional branch for some iterations.
759 return false;
760 }
761 return std::nullopt;
762 };
763
764 // Fold branches for iterations where we know that they will exit or not
765 // exit.
766 for (auto &Pair : ExitInfos) {
767 ExitInfo &Info = Pair.second;
768 for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) {
769 // The branch destination.
770 unsigned j = (i + 1) % e;
771 bool IsLatch = Pair.first == LatchBlock;
772 std::optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch);
773 if (!KnownWillExit) {
774 if (!Info.FirstExitingBlock)
775 Info.FirstExitingBlock = Info.ExitingBlocks[i];
776 continue;
777 }
778
779 // We don't fold known-exiting branches for non-latch exits here,
780 // because this ensures that both all loop blocks and all exit blocks
781 // remain reachable in the CFG.
782 // TODO: We could fold these branches, but it would require much more
783 // sophisticated updates to LoopInfo.
784 if (*KnownWillExit && !IsLatch) {
785 if (!Info.FirstExitingBlock)
786 Info.FirstExitingBlock = Info.ExitingBlocks[i];
787 continue;
788 }
789
790 SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue);
791 }
792 }
793
794 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
795 DomTreeUpdater *DTUToUse = &DTU;
796 if (ExitingBlocks.size() == 1 && ExitInfos.size() == 1) {
797 // Manually update the DT if there's a single exiting node. In that case
798 // there's a single exit node and it is sufficient to update the nodes
799 // immediately dominated by the original exiting block. They will become
800 // dominated by the first exiting block that leaves the loop after
801 // unrolling. Note that the CFG inside the loop does not change, so there's
802 // no need to update the DT inside the unrolled loop.
803 DTUToUse = nullptr;
804 auto &[OriginalExit, Info] = *ExitInfos.begin();
805 if (!Info.FirstExitingBlock)
806 Info.FirstExitingBlock = Info.ExitingBlocks.back();
807 for (auto *C : to_vector(DT->getNode(OriginalExit)->children())) {
808 if (L->contains(C->getBlock()))
809 continue;
810 C->setIDom(DT->getNode(Info.FirstExitingBlock));
811 }
812 } else {
813 DTU.applyUpdates(DTUpdates);
814 }
815
816 // When completely unrolling, the last latch becomes unreachable.
817 if (!LatchIsExiting && CompletelyUnroll) {
818 // There is no need to update the DT here, because there must be a unique
819 // latch. Hence if the latch is not exiting it must directly branch back to
820 // the original loop header and does not dominate any nodes.
821 assert(LatchBlock->getSingleSuccessor() && "Loop with multiple latches?");
822 changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA);
823 }
824
825 // Merge adjacent basic blocks, if possible.
826 for (BasicBlock *Latch : Latches) {
827 BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
828 assert((Term ||
829 (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
830 "Need a branch as terminator, except when fully unrolling with "
831 "unconditional latch");
832 if (Term && Term->isUnconditional()) {
833 BasicBlock *Dest = Term->getSuccessor(0);
834 BasicBlock *Fold = Dest->getUniquePredecessor();
835 if (MergeBlockIntoPredecessor(Dest, /*DTU=*/DTUToUse, LI,
836 /*MSSAU=*/nullptr, /*MemDep=*/nullptr,
837 /*PredecessorWithTwoSuccessors=*/false,
838 DTUToUse ? nullptr : DT)) {
839 // Dest has been folded into Fold. Update our worklists accordingly.
840 std::replace(Latches.begin(), Latches.end(), Dest, Fold);
841 llvm::erase_value(UnrolledLoopBlocks, Dest);
842 }
843 }
844 }
845
846 if (DTUToUse) {
847 // Apply updates to the DomTree.
848 DT = &DTU.getDomTree();
849 }
851 DT->verify(DominatorTree::VerificationLevel::Fast));
852
853 // At this point, the code is well formed. We now simplify the unrolled loop,
854 // doing constant propagation and dead code elimination as we go.
855 simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
856 TTI);
857
858 NumCompletelyUnrolled += CompletelyUnroll;
859 ++NumUnrolled;
860
861 Loop *OuterL = L->getParentLoop();
862 // Update LoopInfo if the loop is completely removed.
863 if (CompletelyUnroll) {
864 LI->erase(L);
865 // We shouldn't try to use `L` anymore.
866 L = nullptr;
867 } else if (OriginalTripCount) {
868 // Update the trip count. Note that the remainder has already logic
869 // computing it in `UnrollRuntimeLoopRemainder`.
870 setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count,
871 EstimatedLoopInvocationWeight);
872 }
873
874 // LoopInfo should not be valid, confirm that.
876 LI->verify(*DT);
877
878 // After complete unrolling most of the blocks should be contained in OuterL.
879 // However, some of them might happen to be out of OuterL (e.g. if they
880 // precede a loop exit). In this case we might need to insert PHI nodes in
881 // order to preserve LCSSA form.
882 // We don't need to check this if we already know that we need to fix LCSSA
883 // form.
884 // TODO: For now we just recompute LCSSA for the outer loop in this case, but
885 // it should be possible to fix it in-place.
886 if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
887 NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
888
889 // Make sure that loop-simplify form is preserved. We want to simplify
890 // at least one layer outside of the loop that was unrolled so that any
891 // changes to the parent loop exposed by the unrolling are considered.
892 if (OuterL) {
893 // OuterL includes all loops for which we can break loop-simplify, so
894 // it's sufficient to simplify only it (it'll recursively simplify inner
895 // loops too).
896 if (NeedToFixLCSSA) {
897 // LCSSA must be performed on the outermost affected loop. The unrolled
898 // loop's last loop latch is guaranteed to be in the outermost loop
899 // after LoopInfo's been updated by LoopInfo::erase.
900 Loop *LatchLoop = LI->getLoopFor(Latches.back());
901 Loop *FixLCSSALoop = OuterL;
902 if (!FixLCSSALoop->contains(LatchLoop))
903 while (FixLCSSALoop->getParentLoop() != LatchLoop)
904 FixLCSSALoop = FixLCSSALoop->getParentLoop();
905
906 formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
907 } else if (PreserveLCSSA) {
908 assert(OuterL->isLCSSAForm(*DT) &&
909 "Loops should be in LCSSA form after loop-unroll.");
910 }
911
912 // TODO: That potentially might be compile-time expensive. We should try
913 // to fix the loop-simplified form incrementally.
914 simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
915 } else {
916 // Simplify loops for which we might've broken loop-simplify form.
917 for (Loop *SubLoop : LoopsToSimplify)
918 simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
919 }
920
921 return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
922 : LoopUnrollResult::PartiallyUnrolled;
923}
924
925/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
926/// node with the given name (for example, "llvm.loop.unroll.count"). If no
927/// such metadata node exists, then nullptr is returned.
929 // First operand should refer to the loop id itself.
930 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
931 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
932
933 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
934 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
935 if (!MD)
936 continue;
937
938 MDString *S = dyn_cast<MDString>(MD->getOperand(0));
939 if (!S)
940 continue;
941
942 if (Name.equals(S->getString()))
943 return MD;
944 }
945 return nullptr;
946}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:496
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool needToInsertPhisForLCSSA(Loop *L, const std::vector< BasicBlock * > &Blocks, LoopInfo *LI)
Check if unrolling created a situation where we need to insert phi nodes to preserve LCSSA form.
Definition: LoopUnroll.cpp:123
static bool isEpilogProfitable(Loop *L)
The function chooses which type of unroll (epilog or prolog) is more profitabale.
Definition: LoopUnroll.cpp:202
static cl::opt< bool > UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog."))
#define DEBUG_TYPE
Definition: LoopUnroll.cpp:80
static cl::opt< bool > UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden, cl::desc("Verify loopinfo after unrolling"), cl::init(false))
static cl::opt< bool > UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, cl::desc("Verify domtree after unrolling"), cl::init(false))
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This defines the Use class.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1934
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:304
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:326
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:353
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
Debug location.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator begin()
Definition: DenseMap.h:75
iterator end()
Definition: DenseMap.h:84
iterator_range< iterator > children()
DomTreeNodeBase * getIDom() const
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
DominatorTree & getDomTree()
Flush DomTree updates and return DomTree.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:83
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1221
RPOIterator endRPO() const
Definition: LoopIterator.h:140
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:442
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:876
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
bool isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens=true) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:462
Metadata node.
Definition: Metadata.h:950
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1303
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1309
A single uniqued string.
Definition: Metadata.h:611
StringRef getString() const
Definition: Metadata.cpp:509
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
The main scalar evolution driver.
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
iterator find(const KeyT &Val)
Definition: ValueMap.h:155
iterator begin()
Definition: ValueMap.h:134
iterator end()
Definition: ValueMap.h:135
LLVM Value Representation.
Definition: Value.h:74
self_iterator getIterator()
Definition: ilist_node.h:82
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:982
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:819
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:529
auto successors(const MachineBasicBlock *BB)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:408
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
cl::opt< bool > EnableFSDiscriminator
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead)
SimplifyLoopIVs - Simplify users of induction variables within this loop.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1303
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:54
@ Unmodified
The loop was not modified.
unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition: Local.cpp:2306
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2029
bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight)
Set a loop's branch weight metadata to reflect that loop has EstimatedTripCount iterations and Estima...
Definition: LoopUtils.cpp:837
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:148
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:296
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI)
Perform some cleanup and simplifications on loops after unrolling.
Definition: LoopUnroll.cpp:216
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:928
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.