LLVM 22.0.0git
LoopUnrollRuntime.cpp
Go to the documentation of this file.
1//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements some loop unrolling utilities for loops with run-time
10// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
11// trip counts.
12//
13// The functions in this file are used to generate extra code when the
14// run-time trip count modulo the unroll factor is not 0. When this is the
15// case, we need to generate code to execute these 'left over' iterations.
16//
17// The current strategy generates an if-then-else sequence prior to the
18// unrolled loop to execute the 'left over' iterations before or after the
19// unrolled loop.
20//
21//===----------------------------------------------------------------------===//
22
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/MDBuilder.h"
32#include "llvm/IR/Module.h"
35#include "llvm/Support/Debug.h"
43#include <cmath>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "loop-unroll"
48
49STATISTIC(NumRuntimeUnrolled,
50 "Number of loops unrolled with run-time trip counts");
52 "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
53 cl::desc("Allow runtime unrolling for loops with multiple exits, when "
54 "epilog is generated"));
56 "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
57 cl::desc("Assume the non latch exit block to be predictable"));
58
59// Probability that the loop trip count is so small that after the prolog
60// we do not enter the unrolled loop at all.
61// It is unlikely that the loop trip count is smaller than the unroll factor;
62// other than that, the choice of constant is not tuned yet.
63static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127};
64// Probability that the loop trip count is so small that we skip the unrolled
65// loop completely and immediately enter the epilogue loop.
66// It is unlikely that the loop trip count is smaller than the unroll factor;
67// other than that, the choice of constant is not tuned yet.
68static const uint32_t EpilogHeaderWeights[] = {1, 127};
69
70/// Connect the unrolling prolog code to the original loop.
71/// The unrolling prolog code contains code to execute the
72/// 'extra' iterations if the run-time trip count modulo the
73/// unroll count is non-zero.
74///
75/// This function performs the following:
76/// - Create PHI nodes at prolog end block to combine values
77/// that exit the prolog code and jump around the prolog.
78/// - Add a PHI operand to a PHI node at the loop exit block
79/// for values that exit the prolog and go around the loop.
80/// - Branch around the original loop if the trip count is less
81/// than the unroll factor.
82///
83static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
84 BasicBlock *PrologExit,
85 BasicBlock *OriginalLoopLatchExit,
86 BasicBlock *PreHeader, BasicBlock *NewPreHeader,
88 LoopInfo *LI, bool PreserveLCSSA,
89 ScalarEvolution &SE) {
90 // Loop structure should be the following:
91 // Preheader
92 // PrologHeader
93 // ...
94 // PrologLatch
95 // PrologExit
96 // NewPreheader
97 // Header
98 // ...
99 // Latch
100 // LatchExit
101 BasicBlock *Latch = L->getLoopLatch();
102 assert(Latch && "Loop must have a latch");
103 BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
104
105 // Create a PHI node for each outgoing value from the original loop
106 // (which means it is an outgoing value from the prolog code too).
107 // The new PHI node is inserted in the prolog end basic block.
108 // The new PHI node value is added as an operand of a PHI node in either
109 // the loop header or the loop exit block.
110 for (BasicBlock *Succ : successors(Latch)) {
111 for (PHINode &PN : Succ->phis()) {
112 // Add a new PHI node to the prolog end block and add the
113 // appropriate incoming values.
114 // TODO: This code assumes that the PrologExit (or the LatchExit block for
115 // prolog loop) contains only one predecessor from the loop, i.e. the
116 // PrologLatch. When supporting multiple-exiting block loops, we can have
117 // two or more blocks that have the LatchExit as the target in the
118 // original loop.
119 PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
120 NewPN->insertBefore(PrologExit->getFirstNonPHIIt());
121 // Adding a value to the new PHI node from the original loop preheader.
122 // This is the value that skips all the prolog code.
123 if (L->contains(&PN)) {
124 // Succ is loop header.
125 NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
126 PreHeader);
127 } else {
128 // Succ is LatchExit.
129 NewPN->addIncoming(PoisonValue::get(PN.getType()), PreHeader);
130 }
131
132 Value *V = PN.getIncomingValueForBlock(Latch);
134 if (L->contains(I)) {
135 V = VMap.lookup(I);
136 }
137 }
138 // Adding a value to the new PHI node from the last prolog block
139 // that was created.
140 NewPN->addIncoming(V, PrologLatch);
141
142 // Update the existing PHI node operand with the value from the
143 // new PHI node. How this is done depends on if the existing
144 // PHI node is in the original loop block, or the exit block.
145 if (L->contains(&PN))
146 PN.setIncomingValueForBlock(NewPreHeader, NewPN);
147 else
148 PN.addIncoming(NewPN, PrologExit);
150 }
151 }
152
153 // Make sure that created prolog loop is in simplified form
154 SmallVector<BasicBlock *, 4> PrologExitPreds;
155 Loop *PrologLoop = LI->getLoopFor(PrologLatch);
156 if (PrologLoop) {
157 for (BasicBlock *PredBB : predecessors(PrologExit))
158 if (PrologLoop->contains(PredBB))
159 PrologExitPreds.push_back(PredBB);
160
161 SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
162 nullptr, PreserveLCSSA);
163 }
164
165 // Create a branch around the original loop, which is taken if there are no
166 // iterations remaining to be executed after running the prologue.
167 Instruction *InsertPt = PrologExit->getTerminator();
168 IRBuilder<> B(InsertPt);
169
170 assert(Count != 0 && "nonsensical Count!");
171
172 // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
173 // This means %xtraiter is (BECount + 1) and all of the iterations of this
174 // loop were executed by the prologue. Note that if BECount <u (Count - 1)
175 // then (BECount + 1) cannot unsigned-overflow.
176 Value *BrLoopExit =
177 B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
178 // Split the exit to maintain loop canonicalization guarantees
179 SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
180 SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
181 nullptr, PreserveLCSSA);
182 // Add the branch to the exit block (around the unrolled loop)
183 MDNode *BranchWeights = nullptr;
184 if (hasBranchWeightMD(*Latch->getTerminator())) {
185 // Assume loop is nearly always entered.
186 MDBuilder MDB(B.getContext());
188 }
189 B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader,
190 BranchWeights);
191 InsertPt->eraseFromParent();
192 if (DT) {
193 auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
194 PrologExit);
195 DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
196 }
197}
198
199/// Assume, due to our position in the remainder loop or its guard, anywhere
200/// from 0 to \p N more iterations can possibly execute. Among such cases in
201/// the original loop (with loop probability \p OriginalLoopProb), what is the
202/// probability of executing at least one more iteration?
204probOfNextInRemainder(BranchProbability OriginalLoopProb, unsigned N) {
205 // Each of these variables holds the original loop's probability that the
206 // number of iterations it will execute is some m in the specified range.
207 BranchProbability ProbOne = OriginalLoopProb; // 1 <= m
208 BranchProbability ProbTooMany = ProbOne.pow(N + 1); // N + 1 <= m
209 BranchProbability ProbNotTooMany = ProbTooMany.getCompl(); // 0 <= m <= N
210 BranchProbability ProbOneNotTooMany = ProbOne - ProbTooMany; // 1 <= m <= N
211 return ProbOneNotTooMany / ProbNotTooMany;
212}
213
214/// Connect the unrolling epilog code to the original loop.
215/// The unrolling epilog code contains code to execute the
216/// 'extra' iterations if the run-time trip count modulo the
217/// unroll count is non-zero.
218///
219/// This function performs the following:
220/// - Update PHI nodes at the epilog loop exit
221/// - Create PHI nodes at the unrolling loop exit and epilog preheader to
222/// combine values that exit the unrolling loop code and jump around it.
223/// - Update PHI operands in the epilog loop by the new PHI nodes
224/// - At the unrolling loop exit, branch around the epilog loop if extra iters
225// (ModVal) is zero.
226/// - At the epilog preheader, add an llvm.assume call that extra iters is
227/// non-zero. If the unrolling loop exit is the predecessor, the above new
228/// branch guarantees that assumption. If the unrolling loop preheader is the
229/// predecessor, then the required first iteration from the original loop has
230/// yet to be executed, so it must be executed in the epilog loop. If we
231/// later unroll the epilog loop, that llvm.assume call somehow enables
232/// ScalarEvolution to compute a epilog loop maximum trip count, which enables
233/// eliminating the branch at the end of the final unrolled epilog iteration.
234///
235static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
236 BasicBlock *Exit, BasicBlock *PreHeader,
237 BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
239 LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
240 unsigned Count, AssumptionCache &AC,
241 BranchProbability OriginalLoopProb) {
242 BasicBlock *Latch = L->getLoopLatch();
243 assert(Latch && "Loop must have a latch");
244 BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
245
246 // Loop structure should be the following:
247 //
248 // PreHeader
249 // NewPreHeader
250 // Header
251 // ...
252 // Latch
253 // NewExit (PN)
254 // EpilogPreHeader
255 // EpilogHeader
256 // ...
257 // EpilogLatch
258 // Exit (EpilogPN)
259
260 // Update PHI nodes at Exit.
261 for (PHINode &PN : NewExit->phis()) {
262 // PN should be used in another PHI located in Exit block as
263 // Exit was split by SplitBlockPredecessors into Exit and NewExit
264 // Basically it should look like:
265 // NewExit:
266 // PN = PHI [I, Latch]
267 // ...
268 // Exit:
269 // EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]
270 //
271 // Exits from non-latch blocks point to the original exit block and the
272 // epilogue edges have already been added.
273 //
274 // There is EpilogPreHeader incoming block instead of NewExit as
275 // NewExit was split 1 more time to get EpilogPreHeader.
276 assert(PN.hasOneUse() && "The phi should have 1 use");
277 PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
278 assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
279
280 Value *V = PN.getIncomingValueForBlock(Latch);
282 if (I && L->contains(I))
283 // If value comes from an instruction in the loop add VMap value.
284 V = VMap.lookup(I);
285 // For the instruction out of the loop, constant or undefined value
286 // insert value itself.
287 EpilogPN->addIncoming(V, EpilogLatch);
288
289 assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
290 "EpilogPN should have EpilogPreHeader incoming block");
291 // Change EpilogPreHeader incoming block to NewExit.
292 EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
293 NewExit);
294 // Now PHIs should look like:
295 // NewExit:
296 // PN = PHI [I, Latch]
297 // ...
298 // Exit:
299 // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
300 }
301
302 // Create PHI nodes at NewExit (from the unrolling loop Latch) and at
303 // EpilogPreHeader (from PreHeader and NewExit). Update corresponding PHI
304 // nodes in epilog loop.
305 for (BasicBlock *Succ : successors(Latch)) {
306 // Skip this as we already updated phis in exit blocks.
307 if (!L->contains(Succ))
308 continue;
309
310 // Succ here appears to always be just L->getHeader(). Otherwise, how do we
311 // know its corresponding epilog block (from VMap) is EpilogHeader and thus
312 // EpilogPreHeader is the right incoming block for VPN, as set below?
313 // TODO: Can we thus avoid the enclosing loop over successors?
314 assert(Succ == L->getHeader() &&
315 "Expect the only in-loop successor of latch to be the loop header");
316
317 for (PHINode &PN : Succ->phis()) {
318 // Add new PHI nodes to the loop exit block.
319 PHINode *NewPN0 = PHINode::Create(PN.getType(), /*NumReservedValues=*/1,
320 PN.getName() + ".unr");
321 NewPN0->insertBefore(NewExit->getFirstNonPHIIt());
322 // Add value to the new PHI node from the unrolling loop latch.
323 NewPN0->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
324
325 // Add new PHI nodes to EpilogPreHeader.
326 PHINode *NewPN1 = PHINode::Create(PN.getType(), /*NumReservedValues=*/2,
327 PN.getName() + ".epil.init");
328 NewPN1->insertBefore(EpilogPreHeader->getFirstNonPHIIt());
329 // Add value to the new PHI node from the unrolling loop preheader.
330 NewPN1->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
331 // Add value to the new PHI node from the epilog loop guard.
332 NewPN1->addIncoming(NewPN0, NewExit);
333
334 // Update the existing PHI node operand with the value from the new PHI
335 // node. Corresponding instruction in epilog loop should be PHI.
336 PHINode *VPN = cast<PHINode>(VMap[&PN]);
337 VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN1);
338 }
339 }
340
341 // In NewExit, branch around the epilog loop if no extra iters.
342 Instruction *InsertPt = NewExit->getTerminator();
343 IRBuilder<> B(InsertPt);
344 Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
345 assert(Exit && "Loop must have a single exit block only");
346 // Split the epilogue exit to maintain loop canonicalization guarantees
348 SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
349 PreserveLCSSA);
350 // Add the branch to the exit block (around the epilog loop)
351 MDNode *BranchWeights = nullptr;
352 if (OriginalLoopProb.isUnknown() &&
353 hasBranchWeightMD(*Latch->getTerminator())) {
354 // Assume equal distribution in interval [0, Count).
355 MDBuilder MDB(B.getContext());
356 BranchWeights = MDB.createBranchWeights(1, Count - 1);
357 }
358 BranchInst *RemainderLoopGuard =
359 B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
360 if (!OriginalLoopProb.isUnknown()) {
361 setBranchProbability(RemainderLoopGuard,
362 probOfNextInRemainder(OriginalLoopProb, Count - 1),
363 /*ForFirstTarget=*/true);
364 }
365 InsertPt->eraseFromParent();
366 if (DT) {
367 auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
368 DT->changeImmediateDominator(Exit, NewDom);
369 }
370
371 // In EpilogPreHeader, assume extra iters is non-zero.
372 IRBuilder<> B2(EpilogPreHeader, EpilogPreHeader->getFirstNonPHIIt());
373 Value *ModIsNotNull = B2.CreateIsNotNull(ModVal, "lcmp.mod");
374 AssumeInst *AI = cast<AssumeInst>(B2.CreateAssumption(ModIsNotNull));
375 AC.registerAssumption(AI);
376}
377
378/// Create a clone of the blocks in a loop and connect them together. A new
379/// loop will be created including all cloned blocks, and the iterator of the
380/// new loop switched to count NewIter down to 0.
381/// The cloned blocks should be inserted between InsertTop and InsertBot.
382/// InsertTop should be new preheader, InsertBot new loop exit.
383/// Returns the new cloned loop that is created.
384static Loop *CloneLoopBlocks(Loop *L, Value *NewIter,
385 const bool UseEpilogRemainder,
386 const bool UnrollRemainder, BasicBlock *InsertTop,
387 BasicBlock *InsertBot, BasicBlock *Preheader,
388 std::vector<BasicBlock *> &NewBlocks,
389 LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
390 DominatorTree *DT, LoopInfo *LI, unsigned Count,
391 std::optional<unsigned> OriginalTripCount,
392 BranchProbability OriginalLoopProb) {
393 StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
394 BasicBlock *Header = L->getHeader();
395 BasicBlock *Latch = L->getLoopLatch();
396 Function *F = Header->getParent();
397 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
398 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
399 Loop *ParentLoop = L->getParentLoop();
400 NewLoopsMap NewLoops;
401 NewLoops[ParentLoop] = ParentLoop;
402
403 // For each block in the original loop, create a new copy,
404 // and update the value map with the newly created values.
405 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
406 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
407 NewBlocks.push_back(NewBB);
408
409 addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
410
411 VMap[*BB] = NewBB;
412 if (Header == *BB) {
413 // For the first block, add a CFG connection to this newly
414 // created block.
415 InsertTop->getTerminator()->setSuccessor(0, NewBB);
416 }
417
418 if (DT) {
419 if (Header == *BB) {
420 // The header is dominated by the preheader.
421 DT->addNewBlock(NewBB, InsertTop);
422 } else {
423 // Copy information from original loop to unrolled loop.
424 BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
425 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
426 }
427 }
428
429 if (Latch == *BB) {
430 // For the last block, create a loop back to cloned head.
431 VMap.erase((*BB)->getTerminator());
432 // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
433 // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
434 // thus we must compare the post-increment (wrapping) value.
435 BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
436 BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
437 IRBuilder<> Builder(LatchBR);
438 PHINode *NewIdx =
439 PHINode::Create(NewIter->getType(), 2, suffix + ".iter");
440 NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt());
441 auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
442 auto *One = ConstantInt::get(NewIdx->getType(), 1);
443 Value *IdxNext =
444 Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
445 Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
446 MDNode *BranchWeights = nullptr;
447 if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) &&
448 hasBranchWeightMD(*LatchBR)) {
449 uint32_t ExitWeight;
450 uint32_t BackEdgeWeight;
451 if (Count >= 3) {
452 // Note: We do not enter this loop for zero-remainders. The check
453 // is at the end of the loop. We assume equal distribution between
454 // possible remainders in [1, Count).
455 ExitWeight = 1;
456 BackEdgeWeight = (Count - 2) / 2;
457 } else {
458 // Unnecessary backedge, should never be taken. The conditional
459 // jump should be optimized away later.
460 ExitWeight = 1;
461 BackEdgeWeight = 0;
462 }
463 MDBuilder MDB(Builder.getContext());
464 BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
465 }
466 BranchInst *RemainderLoopLatch =
467 Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
468 if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) {
469 // Compute the total frequency of the original loop body from the
470 // remainder iterations. Once we've reached them, the first of them
471 // always executes, so its frequency and probability are 1.
472 double FreqRemIters = 1;
473 if (Count > 2) {
475 for (unsigned N = Count - 2; N >= 1; --N) {
476 ProbReaching *= probOfNextInRemainder(OriginalLoopProb, N);
477 FreqRemIters += double(ProbReaching.getNumerator()) /
478 ProbReaching.getDenominator();
479 }
480 }
481 // Solve for the loop probability that would produce that frequency.
482 // Sum(i=0..inf)(Prob^i) = 1/(1-Prob) = FreqRemIters.
483 double ProbDouble = 1 - 1 / FreqRemIters;
485 std::round(ProbDouble * BranchProbability::getDenominator()),
487 setBranchProbability(RemainderLoopLatch, Prob, /*ForFirstTarget=*/true);
488 }
489 NewIdx->addIncoming(Zero, InsertTop);
490 NewIdx->addIncoming(IdxNext, NewBB);
491 LatchBR->eraseFromParent();
492 }
493 }
494
495 // Change the incoming values to the ones defined in the preheader or
496 // cloned loop.
497 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
498 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
499 unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
500 NewPHI->setIncomingBlock(idx, InsertTop);
501 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
502 idx = NewPHI->getBasicBlockIndex(Latch);
503 Value *InVal = NewPHI->getIncomingValue(idx);
504 NewPHI->setIncomingBlock(idx, NewLatch);
505 if (Value *V = VMap.lookup(InVal))
506 NewPHI->setIncomingValue(idx, V);
507 }
508
509 Loop *NewLoop = NewLoops[L];
510 assert(NewLoop && "L should have been cloned");
511
512 if (OriginalTripCount && UseEpilogRemainder)
513 setLoopEstimatedTripCount(NewLoop, *OriginalTripCount % Count);
514
515 // Add unroll disable metadata to disable future unrolling for this loop.
516 if (!UnrollRemainder)
517 NewLoop->setLoopAlreadyUnrolled();
518 return NewLoop;
519}
520
521/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
522/// we return true only if UnrollRuntimeMultiExit is set to true.
524 Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
525 bool UseEpilogRemainder) {
526
527 // The main pain point with multi-exit loop unrolling is that once unrolled,
528 // we will not be able to merge all blocks into a straight line code.
529 // There are branches within the unrolled loop that go to the OtherExits.
530 // The second point is the increase in code size, but this is true
531 // irrespective of multiple exits.
532
533 // Note: Both the heuristics below are coarse grained. We are essentially
534 // enabling unrolling of loops that have a single side exit other than the
535 // normal LatchExit (i.e. exiting into a deoptimize block).
536 // The heuristics considered are:
537 // 1. low number of branches in the unrolled version.
538 // 2. high predictability of these extra branches.
539 // We avoid unrolling loops that have more than two exiting blocks. This
540 // limits the total number of branches in the unrolled loop to be atmost
541 // the unroll factor (since one of the exiting blocks is the latch block).
542 SmallVector<BasicBlock*, 4> ExitingBlocks;
543 L->getExitingBlocks(ExitingBlocks);
544 if (ExitingBlocks.size() > 2)
545 return false;
546
547 // Allow unrolling of loops with no non latch exit blocks.
548 if (OtherExits.size() == 0)
549 return true;
550
551 // The second heuristic is that L has one exit other than the latchexit and
552 // that exit is a deoptimize block. We know that deoptimize blocks are rarely
553 // taken, which also implies the branch leading to the deoptimize block is
554 // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
555 // assume the other exit branch is predictable even if it has no deoptimize
556 // call.
557 return (OtherExits.size() == 1 &&
559 OtherExits[0]->getPostdominatingDeoptimizeCall()));
560 // TODO: These can be fine-tuned further to consider code size or deopt states
561 // that are captured by the deoptimize exit block.
562 // Also, we can extend this to support more cases, if we actually
563 // know of kinds of multiexit loops that would benefit from unrolling.
564}
565
566/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
567/// accounting for the possibility of unsigned overflow in the 2s complement
568/// domain. Preconditions:
569/// 1) TripCount = BECount + 1 (allowing overflow)
570/// 2) Log2(Count) <= BitWidth(BECount)
572 Value *TripCount, unsigned Count) {
573 // Note that TripCount is BECount + 1.
574 if (isPowerOf2_32(Count))
575 // If the expression is zero, then either:
576 // 1. There are no iterations to be run in the prolog/epilog loop.
577 // OR
578 // 2. The addition computing TripCount overflowed.
579 //
580 // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
581 // the number of iterations that remain to be run in the original loop is a
582 // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a
583 // precondition of this method).
584 return B.CreateAnd(TripCount, Count - 1, "xtraiter");
585
586 // As (BECount + 1) can potentially unsigned overflow we count
587 // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
588 Constant *CountC = ConstantInt::get(BECount->getType(), Count);
589 Value *ModValTmp = B.CreateURem(BECount, CountC);
590 Value *ModValAdd = B.CreateAdd(ModValTmp,
591 ConstantInt::get(ModValTmp->getType(), 1));
592 // At that point (BECount % Count) + 1 could be equal to Count.
593 // To handle this case we need to take mod by Count one more time.
594 return B.CreateURem(ModValAdd, CountC, "xtraiter");
595}
596
597
598/// Insert code in the prolog/epilog code when unrolling a loop with a
599/// run-time trip-count.
600///
601/// This method assumes that the loop unroll factor is total number
602/// of loop bodies in the loop after unrolling. (Some folks refer
603/// to the unroll factor as the number of *extra* copies added).
604/// We assume also that the loop unroll factor is a power-of-two. So, after
605/// unrolling the loop, the number of loop bodies executed is 2,
606/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
607/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
608/// the switch instruction is generated.
609///
610/// ***Prolog case***
611/// extraiters = tripcount % loopfactor
612/// if (extraiters == 0) jump Loop:
613/// else jump Prol:
614/// Prol: LoopBody;
615/// extraiters -= 1 // Omitted if unroll factor is 2.
616/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
617/// if (tripcount < loopfactor) jump End:
618/// Loop:
619/// ...
620/// End:
621///
622/// ***Epilog case***
623/// extraiters = tripcount % loopfactor
624/// if (tripcount < loopfactor) jump LoopExit:
625/// unroll_iters = tripcount - extraiters
626/// Loop: LoopBody; (executes unroll_iter times);
627/// unroll_iter -= 1
628/// if (unroll_iter != 0) jump Loop:
629/// LoopExit:
630/// if (extraiters == 0) jump EpilExit:
631/// Epil: LoopBody; (executes extraiters times)
632/// extraiters -= 1 // Omitted if unroll factor is 2.
633/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
634/// EpilExit:
635
637 Loop *L, unsigned Count, bool AllowExpensiveTripCount,
638 bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
640 const TargetTransformInfo *TTI, bool PreserveLCSSA,
641 unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit,
642 Loop **ResultLoop, std::optional<unsigned> OriginalTripCount,
643 BranchProbability OriginalLoopProb) {
644 LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
645 LLVM_DEBUG(L->dump());
646 LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
647 : dbgs() << "Using prolog remainder.\n");
648
649 // Make sure the loop is in canonical form.
650 if (!L->isLoopSimplifyForm()) {
651 LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
652 return false;
653 }
654
655 // Guaranteed by LoopSimplifyForm.
656 BasicBlock *Latch = L->getLoopLatch();
657 BasicBlock *Header = L->getHeader();
658
659 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
660
661 if (!LatchBR || LatchBR->isUnconditional()) {
662 // The loop-rotate pass can be helpful to avoid this in many cases.
664 dbgs()
665 << "Loop latch not terminated by a conditional branch.\n");
666 return false;
667 }
668
669 unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
670 BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
671
672 if (L->contains(LatchExit)) {
673 // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
674 // targets of the Latch be an exit block out of the loop.
676 dbgs()
677 << "One of the loop latch successors must be the exit block.\n");
678 return false;
679 }
680
681 // These are exit blocks other than the target of the latch exiting block.
683 L->getUniqueNonLatchExitBlocks(OtherExits);
684 // Support only single exit and exiting block unless multi-exit loop
685 // unrolling is enabled.
686 if (!L->getExitingBlock() || OtherExits.size()) {
687 // We rely on LCSSA form being preserved when the exit blocks are transformed.
688 // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
689 if (!PreserveLCSSA)
690 return false;
691
692 // Priority goes to UnrollRuntimeMultiExit if it's supplied.
693 if (UnrollRuntimeMultiExit.getNumOccurrences()) {
695 return false;
696 } else {
697 // Otherwise perform multi-exit unrolling, if either the target indicates
698 // it is profitable or the general profitability heuristics apply.
699 if (!RuntimeUnrollMultiExit &&
700 !canProfitablyRuntimeUnrollMultiExitLoop(L, OtherExits, LatchExit,
701 UseEpilogRemainder)) {
702 LLVM_DEBUG(dbgs() << "Multiple exit/exiting blocks in loop and "
703 "multi-exit unrolling not enabled!\n");
704 return false;
705 }
706 }
707 }
708 // Use Scalar Evolution to compute the trip count. This allows more loops to
709 // be unrolled than relying on induction var simplification.
710 if (!SE)
711 return false;
712
713 // Only unroll loops with a computable trip count.
714 // We calculate the backedge count by using getExitCount on the Latch block,
715 // which is proven to be the only exiting block in this loop. This is same as
716 // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
717 // exiting blocks).
718 const SCEV *BECountSC = SE->getExitCount(L, Latch);
719 if (isa<SCEVCouldNotCompute>(BECountSC)) {
720 LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
721 return false;
722 }
723
724 unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
725
726 // Add 1 since the backedge count doesn't include the first loop iteration.
727 // (Note that overflow can occur, this is handled explicitly below)
728 const SCEV *TripCountSC =
729 SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
730 if (isa<SCEVCouldNotCompute>(TripCountSC)) {
731 LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
732 return false;
733 }
734
735 BasicBlock *PreHeader = L->getLoopPreheader();
736 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
737 const DataLayout &DL = Header->getDataLayout();
738 SCEVExpander Expander(*SE, DL, "loop-unroll");
739 if (!AllowExpensiveTripCount &&
740 Expander.isHighCostExpansion(TripCountSC, L, SCEVExpansionBudget, TTI,
741 PreHeaderBR)) {
742 LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
743 return false;
744 }
745
746 // This constraint lets us deal with an overflowing trip count easily; see the
747 // comment on ModVal below.
748 if (Log2_32(Count) > BEWidth) {
750 dbgs()
751 << "Count failed constraint on overflow trip count calculation.\n");
752 return false;
753 }
754
755 // Loop structure is the following:
756 //
757 // PreHeader
758 // Header
759 // ...
760 // Latch
761 // LatchExit
762
763 BasicBlock *NewPreHeader;
764 BasicBlock *NewExit = nullptr;
765 BasicBlock *PrologExit = nullptr;
766 BasicBlock *EpilogPreHeader = nullptr;
767 BasicBlock *PrologPreHeader = nullptr;
768
769 if (UseEpilogRemainder) {
770 // If epilog remainder
771 // Split PreHeader to insert a branch around loop for unrolling.
772 NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
773 NewPreHeader->setName(PreHeader->getName() + ".new");
774 // Split LatchExit to create phi nodes from branch above.
775 NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
776 nullptr, PreserveLCSSA);
777 // NewExit gets its DebugLoc from LatchExit, which is not part of the
778 // original Loop.
779 // Fix this by setting Loop's DebugLoc to NewExit.
780 auto *NewExitTerminator = NewExit->getTerminator();
781 NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
782 // Split NewExit to insert epilog remainder loop.
783 EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
784 EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
785
786 // If the latch exits from multiple level of nested loops, then
787 // by assumption there must be another loop exit which branches to the
788 // outer loop and we must adjust the loop for the newly inserted blocks
789 // to account for the fact that our epilogue is still in the same outer
790 // loop. Note that this leaves loopinfo temporarily out of sync with the
791 // CFG until the actual epilogue loop is inserted.
792 if (auto *ParentL = L->getParentLoop())
793 if (LI->getLoopFor(LatchExit) != ParentL) {
794 LI->removeBlock(NewExit);
795 ParentL->addBasicBlockToLoop(NewExit, *LI);
796 LI->removeBlock(EpilogPreHeader);
797 ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
798 }
799
800 } else {
801 // If prolog remainder
802 // Split the original preheader twice to insert prolog remainder loop
803 PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
804 PrologPreHeader->setName(Header->getName() + ".prol.preheader");
805 PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
806 DT, LI);
807 PrologExit->setName(Header->getName() + ".prol.loopexit");
808 // Split PrologExit to get NewPreHeader.
809 NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
810 NewPreHeader->setName(PreHeader->getName() + ".new");
811 }
812 // Loop structure should be the following:
813 // Epilog Prolog
814 //
815 // PreHeader PreHeader
816 // *NewPreHeader *PrologPreHeader
817 // Header *PrologExit
818 // ... *NewPreHeader
819 // Latch Header
820 // *NewExit ...
821 // *EpilogPreHeader Latch
822 // LatchExit LatchExit
823
824 // Calculate conditions for branch around loop for unrolling
825 // in epilog case and around prolog remainder loop in prolog case.
826 // Compute the number of extra iterations required, which is:
827 // extra iterations = run-time trip count % loop unroll factor
828 PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
829 IRBuilder<> B(PreHeaderBR);
830 Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
831 PreHeaderBR);
832 Value *BECount;
833 // If there are other exits before the latch, that may cause the latch exit
834 // branch to never be executed, and the latch exit count may be poison.
835 // In this case, freeze the TripCount and base BECount on the frozen
836 // TripCount. We will introduce two branches using these values, and it's
837 // important that they see a consistent value (which would not be guaranteed
838 // if were frozen independently.)
839 if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&
840 !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {
841 TripCount = B.CreateFreeze(TripCount);
842 BECount =
843 B.CreateAdd(TripCount, Constant::getAllOnesValue(TripCount->getType()));
844 } else {
845 // If we don't need to freeze, use SCEVExpander for BECount as well, to
846 // allow slightly better value reuse.
847 BECount =
848 Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);
849 }
850
851 Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
852
853 Value *BranchVal =
854 UseEpilogRemainder ? B.CreateICmpULT(BECount,
855 ConstantInt::get(BECount->getType(),
856 Count - 1)) :
857 B.CreateIsNotNull(ModVal, "lcmp.mod");
858 BasicBlock *RemainderLoop =
859 UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
860 BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
861 // Branch to either remainder (extra iterations) loop or unrolling loop.
862 MDNode *BranchWeights = nullptr;
863 if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) &&
864 hasBranchWeightMD(*Latch->getTerminator())) {
865 // Assume loop is nearly always entered.
866 MDBuilder MDB(B.getContext());
867 BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);
868 }
869 BranchInst *UnrollingLoopGuard =
870 B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
871 if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) {
872 // The original loop's first iteration always happens. Compute the
873 // probability of the original loop executing Count-1 iterations after that
874 // to complete the first iteration of the unrolled loop.
875 BranchProbability ProbOne = OriginalLoopProb;
876 BranchProbability ProbRest = ProbOne.pow(Count - 1);
877 setBranchProbability(UnrollingLoopGuard, ProbRest,
878 /*ForFirstTarget=*/false);
879 }
880 PreHeaderBR->eraseFromParent();
881 if (DT) {
882 if (UseEpilogRemainder)
883 DT->changeImmediateDominator(EpilogPreHeader, PreHeader);
884 else
885 DT->changeImmediateDominator(PrologExit, PreHeader);
886 }
887 Function *F = Header->getParent();
888 // Get an ordered list of blocks in the loop to help with the ordering of the
889 // cloned blocks in the prolog/epilog code
890 LoopBlocksDFS LoopBlocks(L);
891 LoopBlocks.perform(LI);
892
893 //
894 // For each extra loop iteration, create a copy of the loop's basic blocks
895 // and generate a condition that branches to the copy depending on the
896 // number of 'left over' iterations.
897 //
898 std::vector<BasicBlock *> NewBlocks;
900
901 // Clone all the basic blocks in the loop. If Count is 2, we don't clone
902 // the loop, otherwise we create a cloned loop to execute the extra
903 // iterations. This function adds the appropriate CFG connections.
904 BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
905 BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
906 Loop *remainderLoop =
907 CloneLoopBlocks(L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop,
908 InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT,
909 LI, Count, OriginalTripCount, OriginalLoopProb);
910
911 // Insert the cloned blocks into the function.
912 F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
913
914 // Now the loop blocks are cloned and the other exiting blocks from the
915 // remainder are connected to the original Loop's exit blocks. The remaining
916 // work is to update the phi nodes in the original loop, and take in the
917 // values from the cloned region.
918 for (auto *BB : OtherExits) {
919 // Given we preserve LCSSA form, we know that the values used outside the
920 // loop will be used through these phi nodes at the exit blocks that are
921 // transformed below.
922 for (PHINode &PN : BB->phis()) {
923 unsigned oldNumOperands = PN.getNumIncomingValues();
924 // Add the incoming values from the remainder code to the end of the phi
925 // node.
926 for (unsigned i = 0; i < oldNumOperands; i++){
927 auto *PredBB =PN.getIncomingBlock(i);
928 if (PredBB == Latch)
929 // The latch exit is handled separately, see connectX
930 continue;
931 if (!L->contains(PredBB))
932 // Even if we had dedicated exits, the code above inserted an
933 // extra branch which can reach the latch exit.
934 continue;
935
936 auto *V = PN.getIncomingValue(i);
938 if (L->contains(I))
939 V = VMap.lookup(I);
940 PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));
941 }
942 }
943#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
944 for (BasicBlock *SuccBB : successors(BB)) {
945 assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&
946 "Breaks the definition of dedicated exits!");
947 }
948#endif
949 }
950
951 // Update the immediate dominator of the exit blocks and blocks that are
952 // reachable from the exit blocks. This is needed because we now have paths
953 // from both the original loop and the remainder code reaching the exit
954 // blocks. While the IDom of these exit blocks were from the original loop,
955 // now the IDom is the preheader (which decides whether the original loop or
956 // remainder code should run) unless the block still has just the original
957 // predecessor (such as NewExit in the case of an epilog remainder).
958 if (DT && !L->getExitingBlock()) {
959 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
960 // NB! We have to examine the dom children of all loop blocks, not just
961 // those which are the IDom of the exit blocks. This is because blocks
962 // reachable from the exit blocks can have their IDom as the nearest common
963 // dominator of the exit blocks.
964 for (auto *BB : L->blocks()) {
965 auto *DomNodeBB = DT->getNode(BB);
966 for (auto *DomChild : DomNodeBB->children()) {
967 auto *DomChildBB = DomChild->getBlock();
968 if (!L->contains(LI->getLoopFor(DomChildBB)) &&
969 DomChildBB->getUniquePredecessor() != BB)
970 ChildrenToUpdate.push_back(DomChildBB);
971 }
972 }
973 for (auto *BB : ChildrenToUpdate)
974 DT->changeImmediateDominator(BB, PreHeader);
975 }
976
977 // Loop structure should be the following:
978 // Epilog Prolog
979 //
980 // PreHeader PreHeader
981 // NewPreHeader PrologPreHeader
982 // Header PrologHeader
983 // ... ...
984 // Latch PrologLatch
985 // NewExit PrologExit
986 // EpilogPreHeader NewPreHeader
987 // EpilogHeader Header
988 // ... ...
989 // EpilogLatch Latch
990 // LatchExit LatchExit
991
992 // Rewrite the cloned instruction operands to use the values created when the
993 // clone is created.
994 for (BasicBlock *BB : NewBlocks) {
995 Module *M = BB->getModule();
996 for (Instruction &I : *BB) {
997 RemapInstruction(&I, VMap,
999 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1001 }
1002 }
1003
1004 if (UseEpilogRemainder) {
1005 // Connect the epilog code to the original loop and update the
1006 // PHI functions.
1007 ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
1008 NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC,
1009 OriginalLoopProb);
1010
1011 // Update counter in loop for unrolling.
1012 // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
1013 // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
1014 // thus we must compare the post-increment (wrapping) value.
1015 IRBuilder<> B2(NewPreHeader->getTerminator());
1016 Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
1017 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
1018 PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter");
1019 NewIdx->insertBefore(Header->getFirstNonPHIIt());
1020 B2.SetInsertPoint(LatchBR);
1021 auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
1022 auto *One = ConstantInt::get(NewIdx->getType(), 1);
1023 Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
1024 auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1025 Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
1026 NewIdx->addIncoming(Zero, NewPreHeader);
1027 NewIdx->addIncoming(IdxNext, Latch);
1028 LatchBR->setCondition(IdxCmp);
1029 } else {
1030 // Connect the prolog code to the original loop and update the
1031 // PHI functions.
1032 ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
1033 NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
1034 }
1035
1036 // If this loop is nested, then the loop unroller changes the code in the any
1037 // of its parent loops, so the Scalar Evolution pass needs to be run again.
1038 SE->forgetTopmostLoop(L);
1039
1040 // Verify that the Dom Tree and Loop Info are correct.
1041#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
1042 if (DT) {
1043 assert(DT->verify(DominatorTree::VerificationLevel::Full));
1044 LI->verify(*DT);
1045 }
1046#endif
1047
1048 // For unroll factor 2 remainder loop will have 1 iteration.
1049 if (Count == 2 && DT && LI && SE) {
1050 // TODO: This code could probably be pulled out into a helper function
1051 // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
1052 BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
1053 assert(RemainderLatch);
1054 SmallVector<BasicBlock *> RemainderBlocks(remainderLoop->getBlocks());
1055 breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
1056 remainderLoop = nullptr;
1057
1058 // Simplify loop values after breaking the backedge
1059 const DataLayout &DL = L->getHeader()->getDataLayout();
1061 for (BasicBlock *BB : RemainderBlocks) {
1062 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
1063 if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
1064 if (LI->replacementPreservesLCSSAForm(&Inst, V))
1065 Inst.replaceAllUsesWith(V);
1066 if (isInstructionTriviallyDead(&Inst))
1067 DeadInsts.emplace_back(&Inst);
1068 }
1069 // We can't do recursive deletion until we're done iterating, as we might
1070 // have a phi which (potentially indirectly) uses instructions later in
1071 // the block we're iterating through.
1073 }
1074
1075 // Merge latch into exit block.
1076 auto *ExitBB = RemainderLatch->getSingleSuccessor();
1077 assert(ExitBB && "required after breaking cond br backedge");
1078 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
1079 MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
1080 }
1081
1082 // Canonicalize to LoopSimplifyForm both original and remainder loops. We
1083 // cannot rely on the LoopUnrollPass to do this because it only does
1084 // canonicalization for parent/subloops and not the sibling loops.
1085 if (OtherExits.size() > 0) {
1086 // Generate dedicated exit blocks for the original loop, to preserve
1087 // LoopSimplifyForm.
1088 formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
1089 // Generate dedicated exit blocks for the remainder loop if one exists, to
1090 // preserve LoopSimplifyForm.
1091 if (remainderLoop)
1092 formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
1093 }
1094
1095 auto UnrollResult = LoopUnrollResult::Unmodified;
1096 if (remainderLoop && UnrollRemainder) {
1097 LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
1099 ULO.Count = Count - 1;
1100 ULO.Force = false;
1101 ULO.Runtime = false;
1102 ULO.AllowExpensiveTripCount = false;
1103 ULO.UnrollRemainder = false;
1104 ULO.ForgetAllSCEV = ForgetAllSCEV;
1106 "A loop with a convergence heart does not allow runtime unrolling.");
1107 UnrollResult = UnrollLoop(remainderLoop, ULO, LI, SE, DT, AC, TTI,
1108 /*ORE*/ nullptr, PreserveLCSSA);
1109 }
1110
1111 if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
1112 *ResultLoop = remainderLoop;
1113 NumRuntimeUnrolled++;
1114 return true;
1115}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Module.h This file contains the declarations for the Module class.
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, unsigned Count, AssumptionCache &AC, BranchProbability OriginalLoopProb)
Connect the unrolling epilog code to the original loop.
static const uint32_t UnrolledLoopHeaderWeights[]
static Value * CreateTripRemainder(IRBuilder<> &B, Value *BECount, Value *TripCount, unsigned Count)
Calculate ModVal = (BECount + 1) % Count on the abstract integer domain accounting for the possibilit...
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, unsigned Count, std::optional< unsigned > OriginalTripCount, BranchProbability OriginalLoopProb)
Create a clone of the blocks in a loop and connect them together.
static cl::opt< bool > UnrollRuntimeOtherExitPredictable("unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, cl::desc("Assume the non latch exit block to be predictable"))
static bool canProfitablyRuntimeUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock * > &OtherExits, BasicBlock *LatchExit, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
static const uint32_t EpilogHeaderWeights[]
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))
static BranchProbability probOfNextInRemainder(BranchProbability OriginalLoopProb, unsigned N)
Assume, due to our position in the remainder loop or its guard, anywhere from 0 to N more iterations ...
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE)
Connect the unrolling prolog code to the original loop.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file contains the declarations for profiling metadata utility functions.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This represents the llvm.assume intrinsic.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static uint32_t getDenominator()
BranchProbability pow(unsigned N) const
Compute pow(Probability, N).
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
@ ICMP_NE
not equal
Definition InstrTypes.h:698
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition IRBuilder.h:2659
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Store the result of a depth first search within basic blocks contained by a single loop.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition LoopInfo.h:441
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition LoopInfo.cpp:538
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
bool loopHasNoAbnormalExits(const Loop *L)
Return true if the loop has no abnormal exits.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition ValueMap.h:167
bool erase(const KeyT &Val)
Definition ValueMap.h:192
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
SmallDenseMap< const Loop *, Loop *, 4 > NewLoopsMap
Definition UnrollLoop.h:41
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool setBranchProbability(BranchInst *B, BranchProbability P, bool ForFirstTarget)
Set branch weight metadata for B to indicate that P and 1 - P are the probabilities of control flowin...
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI CallBase * getLoopConvergenceHeart(const Loop *TheLoop)
Find the convergence heart of the loop.
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
@ Unmodified
The loop was not modified.
Definition UnrollLoop.h:60
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
Definition UnrollLoop.h:69
LLVM_ABI void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
TargetTransformInfo TTI
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:58
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
LLVM_ABI const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit, Loop **ResultLoop=nullptr, std::optional< unsigned > OriginalTripCount=std::nullopt, BranchProbability OriginalLoopProb=BranchProbability::getUnknown())
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
LLVM_ABI LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)
Unroll the given loop by Count.
#define N