LLVM 19.0.0git
LoopUnrollRuntime.cpp
Go to the documentation of this file.
1//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements some loop unrolling utilities for loops with run-time
10// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
11// trip counts.
12//
13// The functions in this file are used to generate extra code when the
14// run-time trip count modulo the unroll factor is not 0. When this is the
15// case, we need to generate code to execute these 'left over' iterations.
16//
17// The current strategy generates an if-then-else sequence prior to the
18// unrolled loop to execute the 'left over' iterations before or after the
19// unrolled loop.
20//
21//===----------------------------------------------------------------------===//
22
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/MDBuilder.h"
32#include "llvm/IR/Module.h"
35#include "llvm/Support/Debug.h"
43#include <algorithm>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "loop-unroll"
48
49STATISTIC(NumRuntimeUnrolled,
50 "Number of loops unrolled with run-time trip counts");
52 "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
53 cl::desc("Allow runtime unrolling for loops with multiple exits, when "
54 "epilog is generated"));
56 "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
57 cl::desc("Assume the non latch exit block to be predictable"));
58
59// Probability that the loop trip count is so small that after the prolog
60// we do not enter the unrolled loop at all.
61// It is unlikely that the loop trip count is smaller than the unroll factor;
62// other than that, the choice of constant is not tuned yet.
63static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127};
64// Probability that the loop trip count is so small that we skip the unrolled
65// loop completely and immediately enter the epilogue loop.
66// It is unlikely that the loop trip count is smaller than the unroll factor;
67// other than that, the choice of constant is not tuned yet.
68static const uint32_t EpilogHeaderWeights[] = {1, 127};
69
70/// Connect the unrolling prolog code to the original loop.
71/// The unrolling prolog code contains code to execute the
72/// 'extra' iterations if the run-time trip count modulo the
73/// unroll count is non-zero.
74///
75/// This function performs the following:
76/// - Create PHI nodes at prolog end block to combine values
77/// that exit the prolog code and jump around the prolog.
78/// - Add a PHI operand to a PHI node at the loop exit block
79/// for values that exit the prolog and go around the loop.
80/// - Branch around the original loop if the trip count is less
81/// than the unroll factor.
82///
83static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
84 BasicBlock *PrologExit,
85 BasicBlock *OriginalLoopLatchExit,
86 BasicBlock *PreHeader, BasicBlock *NewPreHeader,
88 LoopInfo *LI, bool PreserveLCSSA,
89 ScalarEvolution &SE) {
90 // Loop structure should be the following:
91 // Preheader
92 // PrologHeader
93 // ...
94 // PrologLatch
95 // PrologExit
96 // NewPreheader
97 // Header
98 // ...
99 // Latch
100 // LatchExit
101 BasicBlock *Latch = L->getLoopLatch();
102 assert(Latch && "Loop must have a latch");
103 BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
104
105 // Create a PHI node for each outgoing value from the original loop
106 // (which means it is an outgoing value from the prolog code too).
107 // The new PHI node is inserted in the prolog end basic block.
108 // The new PHI node value is added as an operand of a PHI node in either
109 // the loop header or the loop exit block.
110 for (BasicBlock *Succ : successors(Latch)) {
111 for (PHINode &PN : Succ->phis()) {
112 // Add a new PHI node to the prolog end block and add the
113 // appropriate incoming values.
114 // TODO: This code assumes that the PrologExit (or the LatchExit block for
115 // prolog loop) contains only one predecessor from the loop, i.e. the
116 // PrologLatch. When supporting multiple-exiting block loops, we can have
117 // two or more blocks that have the LatchExit as the target in the
118 // original loop.
119 PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
120 NewPN->insertBefore(PrologExit->getFirstNonPHIIt());
121 // Adding a value to the new PHI node from the original loop preheader.
122 // This is the value that skips all the prolog code.
123 if (L->contains(&PN)) {
124 // Succ is loop header.
125 NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
126 PreHeader);
127 } else {
128 // Succ is LatchExit.
129 NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
130 }
131
132 Value *V = PN.getIncomingValueForBlock(Latch);
133 if (Instruction *I = dyn_cast<Instruction>(V)) {
134 if (L->contains(I)) {
135 V = VMap.lookup(I);
136 }
137 }
138 // Adding a value to the new PHI node from the last prolog block
139 // that was created.
140 NewPN->addIncoming(V, PrologLatch);
141
142 // Update the existing PHI node operand with the value from the
143 // new PHI node. How this is done depends on if the existing
144 // PHI node is in the original loop block, or the exit block.
145 if (L->contains(&PN))
146 PN.setIncomingValueForBlock(NewPreHeader, NewPN);
147 else
148 PN.addIncoming(NewPN, PrologExit);
149 SE.forgetValue(&PN);
150 }
151 }
152
153 // Make sure that created prolog loop is in simplified form
154 SmallVector<BasicBlock *, 4> PrologExitPreds;
155 Loop *PrologLoop = LI->getLoopFor(PrologLatch);
156 if (PrologLoop) {
157 for (BasicBlock *PredBB : predecessors(PrologExit))
158 if (PrologLoop->contains(PredBB))
159 PrologExitPreds.push_back(PredBB);
160
161 SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
162 nullptr, PreserveLCSSA);
163 }
164
165 // Create a branch around the original loop, which is taken if there are no
166 // iterations remaining to be executed after running the prologue.
167 Instruction *InsertPt = PrologExit->getTerminator();
168 IRBuilder<> B(InsertPt);
169
170 assert(Count != 0 && "nonsensical Count!");
171
172 // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
173 // This means %xtraiter is (BECount + 1) and all of the iterations of this
174 // loop were executed by the prologue. Note that if BECount <u (Count - 1)
175 // then (BECount + 1) cannot unsigned-overflow.
176 Value *BrLoopExit =
177 B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
178 // Split the exit to maintain loop canonicalization guarantees
179 SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
180 SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
181 nullptr, PreserveLCSSA);
182 // Add the branch to the exit block (around the unrolled loop)
183 MDNode *BranchWeights = nullptr;
184 if (hasBranchWeightMD(*Latch->getTerminator())) {
185 // Assume loop is nearly always entered.
186 MDBuilder MDB(B.getContext());
188 }
189 B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader,
190 BranchWeights);
191 InsertPt->eraseFromParent();
192 if (DT) {
193 auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
194 PrologExit);
195 DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
196 }
197}
198
199/// Connect the unrolling epilog code to the original loop.
200/// The unrolling epilog code contains code to execute the
201/// 'extra' iterations if the run-time trip count modulo the
202/// unroll count is non-zero.
203///
204/// This function performs the following:
205/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
206/// - Create PHI nodes at the unrolling loop exit to combine
207/// values that exit the unrolling loop code and jump around it.
208/// - Update PHI operands in the epilog loop by the new PHI nodes
209/// - Branch around the epilog loop if extra iters (ModVal) is zero.
210///
211static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
212 BasicBlock *Exit, BasicBlock *PreHeader,
213 BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
215 LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
216 unsigned Count) {
217 BasicBlock *Latch = L->getLoopLatch();
218 assert(Latch && "Loop must have a latch");
219 BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
220
221 // Loop structure should be the following:
222 //
223 // PreHeader
224 // NewPreHeader
225 // Header
226 // ...
227 // Latch
228 // NewExit (PN)
229 // EpilogPreHeader
230 // EpilogHeader
231 // ...
232 // EpilogLatch
233 // Exit (EpilogPN)
234
235 // Update PHI nodes at NewExit and Exit.
236 for (PHINode &PN : NewExit->phis()) {
237 // PN should be used in another PHI located in Exit block as
238 // Exit was split by SplitBlockPredecessors into Exit and NewExit
239 // Basically it should look like:
240 // NewExit:
241 // PN = PHI [I, Latch]
242 // ...
243 // Exit:
244 // EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]
245 //
246 // Exits from non-latch blocks point to the original exit block and the
247 // epilogue edges have already been added.
248 //
249 // There is EpilogPreHeader incoming block instead of NewExit as
250 // NewExit was spilt 1 more time to get EpilogPreHeader.
251 assert(PN.hasOneUse() && "The phi should have 1 use");
252 PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
253 assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
254
255 // Add incoming PreHeader from branch around the Loop
256 PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
257 SE.forgetValue(&PN);
258
259 Value *V = PN.getIncomingValueForBlock(Latch);
260 Instruction *I = dyn_cast<Instruction>(V);
261 if (I && L->contains(I))
262 // If value comes from an instruction in the loop add VMap value.
263 V = VMap.lookup(I);
264 // For the instruction out of the loop, constant or undefined value
265 // insert value itself.
266 EpilogPN->addIncoming(V, EpilogLatch);
267
268 assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
269 "EpilogPN should have EpilogPreHeader incoming block");
270 // Change EpilogPreHeader incoming block to NewExit.
271 EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
272 NewExit);
273 // Now PHIs should look like:
274 // NewExit:
275 // PN = PHI [I, Latch], [undef, PreHeader]
276 // ...
277 // Exit:
278 // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
279 }
280
281 // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
282 // Update corresponding PHI nodes in epilog loop.
283 for (BasicBlock *Succ : successors(Latch)) {
284 // Skip this as we already updated phis in exit blocks.
285 if (!L->contains(Succ))
286 continue;
287 for (PHINode &PN : Succ->phis()) {
288 // Add new PHI nodes to the loop exit block and update epilog
289 // PHIs with the new PHI values.
290 PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
291 NewPN->insertBefore(NewExit->getFirstNonPHIIt());
292 // Adding a value to the new PHI node from the unrolling loop preheader.
293 NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
294 // Adding a value to the new PHI node from the unrolling loop latch.
295 NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
296
297 // Update the existing PHI node operand with the value from the new PHI
298 // node. Corresponding instruction in epilog loop should be PHI.
299 PHINode *VPN = cast<PHINode>(VMap[&PN]);
300 VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
301 }
302 }
303
304 Instruction *InsertPt = NewExit->getTerminator();
305 IRBuilder<> B(InsertPt);
306 Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
307 assert(Exit && "Loop must have a single exit block only");
308 // Split the epilogue exit to maintain loop canonicalization guarantees
310 SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
311 PreserveLCSSA);
312 // Add the branch to the exit block (around the unrolling loop)
313 MDNode *BranchWeights = nullptr;
314 if (hasBranchWeightMD(*Latch->getTerminator())) {
315 // Assume equal distribution in interval [0, Count).
316 MDBuilder MDB(B.getContext());
317 BranchWeights = MDB.createBranchWeights(1, Count - 1);
318 }
319 B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
320 InsertPt->eraseFromParent();
321 if (DT) {
322 auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
323 DT->changeImmediateDominator(Exit, NewDom);
324 }
325
326 // Split the main loop exit to maintain canonicalization guarantees.
327 SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
328 SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
329 PreserveLCSSA);
330}
331
332/// Create a clone of the blocks in a loop and connect them together. A new
333/// loop will be created including all cloned blocks, and the iterator of the
334/// new loop switched to count NewIter down to 0.
335/// The cloned blocks should be inserted between InsertTop and InsertBot.
336/// InsertTop should be new preheader, InsertBot new loop exit.
337/// Returns the new cloned loop that is created.
338static Loop *
339CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
340 const bool UnrollRemainder,
341 BasicBlock *InsertTop,
342 BasicBlock *InsertBot, BasicBlock *Preheader,
343 std::vector<BasicBlock *> &NewBlocks,
344 LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
345 DominatorTree *DT, LoopInfo *LI, unsigned Count) {
346 StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
347 BasicBlock *Header = L->getHeader();
348 BasicBlock *Latch = L->getLoopLatch();
349 Function *F = Header->getParent();
350 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
351 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
352 Loop *ParentLoop = L->getParentLoop();
353 NewLoopsMap NewLoops;
354 NewLoops[ParentLoop] = ParentLoop;
355
356 // For each block in the original loop, create a new copy,
357 // and update the value map with the newly created values.
358 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
359 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
360 NewBlocks.push_back(NewBB);
361
362 addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
363
364 VMap[*BB] = NewBB;
365 if (Header == *BB) {
366 // For the first block, add a CFG connection to this newly
367 // created block.
368 InsertTop->getTerminator()->setSuccessor(0, NewBB);
369 }
370
371 if (DT) {
372 if (Header == *BB) {
373 // The header is dominated by the preheader.
374 DT->addNewBlock(NewBB, InsertTop);
375 } else {
376 // Copy information from original loop to unrolled loop.
377 BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
378 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
379 }
380 }
381
382 if (Latch == *BB) {
383 // For the last block, create a loop back to cloned head.
384 VMap.erase((*BB)->getTerminator());
385 // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
386 // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
387 // thus we must compare the post-increment (wrapping) value.
388 BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
389 BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
390 IRBuilder<> Builder(LatchBR);
391 PHINode *NewIdx =
392 PHINode::Create(NewIter->getType(), 2, suffix + ".iter");
393 NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt());
394 auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
395 auto *One = ConstantInt::get(NewIdx->getType(), 1);
396 Value *IdxNext =
397 Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
398 Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
399 MDNode *BranchWeights = nullptr;
400 if (hasBranchWeightMD(*LatchBR)) {
401 uint32_t ExitWeight;
402 uint32_t BackEdgeWeight;
403 if (Count >= 3) {
404 // Note: We do not enter this loop for zero-remainders. The check
405 // is at the end of the loop. We assume equal distribution between
406 // possible remainders in [1, Count).
407 ExitWeight = 1;
408 BackEdgeWeight = (Count - 2) / 2;
409 } else {
410 // Unnecessary backedge, should never be taken. The conditional
411 // jump should be optimized away later.
412 ExitWeight = 1;
413 BackEdgeWeight = 0;
414 }
415 MDBuilder MDB(Builder.getContext());
416 BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
417 }
418 Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
419 NewIdx->addIncoming(Zero, InsertTop);
420 NewIdx->addIncoming(IdxNext, NewBB);
421 LatchBR->eraseFromParent();
422 }
423 }
424
425 // Change the incoming values to the ones defined in the preheader or
426 // cloned loop.
427 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
428 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
429 unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
430 NewPHI->setIncomingBlock(idx, InsertTop);
431 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
432 idx = NewPHI->getBasicBlockIndex(Latch);
433 Value *InVal = NewPHI->getIncomingValue(idx);
434 NewPHI->setIncomingBlock(idx, NewLatch);
435 if (Value *V = VMap.lookup(InVal))
436 NewPHI->setIncomingValue(idx, V);
437 }
438
439 Loop *NewLoop = NewLoops[L];
440 assert(NewLoop && "L should have been cloned");
441 MDNode *LoopID = NewLoop->getLoopID();
442
443 // Only add loop metadata if the loop is not going to be completely
444 // unrolled.
445 if (UnrollRemainder)
446 return NewLoop;
447
448 std::optional<MDNode *> NewLoopID = makeFollowupLoopID(
450 if (NewLoopID) {
451 NewLoop->setLoopID(*NewLoopID);
452
453 // Do not setLoopAlreadyUnrolled if loop attributes have been defined
454 // explicitly.
455 return NewLoop;
456 }
457
458 // Add unroll disable metadata to disable future unrolling for this loop.
459 NewLoop->setLoopAlreadyUnrolled();
460 return NewLoop;
461}
462
463/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
464/// we return true only if UnrollRuntimeMultiExit is set to true.
466 Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
467 bool UseEpilogRemainder) {
468
469 // Priority goes to UnrollRuntimeMultiExit if it's supplied.
470 if (UnrollRuntimeMultiExit.getNumOccurrences())
472
473 // The main pain point with multi-exit loop unrolling is that once unrolled,
474 // we will not be able to merge all blocks into a straight line code.
475 // There are branches within the unrolled loop that go to the OtherExits.
476 // The second point is the increase in code size, but this is true
477 // irrespective of multiple exits.
478
479 // Note: Both the heuristics below are coarse grained. We are essentially
480 // enabling unrolling of loops that have a single side exit other than the
481 // normal LatchExit (i.e. exiting into a deoptimize block).
482 // The heuristics considered are:
483 // 1. low number of branches in the unrolled version.
484 // 2. high predictability of these extra branches.
485 // We avoid unrolling loops that have more than two exiting blocks. This
486 // limits the total number of branches in the unrolled loop to be atmost
487 // the unroll factor (since one of the exiting blocks is the latch block).
488 SmallVector<BasicBlock*, 4> ExitingBlocks;
489 L->getExitingBlocks(ExitingBlocks);
490 if (ExitingBlocks.size() > 2)
491 return false;
492
493 // Allow unrolling of loops with no non latch exit blocks.
494 if (OtherExits.size() == 0)
495 return true;
496
497 // The second heuristic is that L has one exit other than the latchexit and
498 // that exit is a deoptimize block. We know that deoptimize blocks are rarely
499 // taken, which also implies the branch leading to the deoptimize block is
500 // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
501 // assume the other exit branch is predictable even if it has no deoptimize
502 // call.
503 return (OtherExits.size() == 1 &&
505 OtherExits[0]->getPostdominatingDeoptimizeCall()));
506 // TODO: These can be fine-tuned further to consider code size or deopt states
507 // that are captured by the deoptimize exit block.
508 // Also, we can extend this to support more cases, if we actually
509 // know of kinds of multiexit loops that would benefit from unrolling.
510}
511
512/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
513/// accounting for the possibility of unsigned overflow in the 2s complement
514/// domain. Preconditions:
515/// 1) TripCount = BECount + 1 (allowing overflow)
516/// 2) Log2(Count) <= BitWidth(BECount)
518 Value *TripCount, unsigned Count) {
519 // Note that TripCount is BECount + 1.
520 if (isPowerOf2_32(Count))
521 // If the expression is zero, then either:
522 // 1. There are no iterations to be run in the prolog/epilog loop.
523 // OR
524 // 2. The addition computing TripCount overflowed.
525 //
526 // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
527 // the number of iterations that remain to be run in the original loop is a
528 // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a
529 // precondition of this method).
530 return B.CreateAnd(TripCount, Count - 1, "xtraiter");
531
532 // As (BECount + 1) can potentially unsigned overflow we count
533 // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
534 Constant *CountC = ConstantInt::get(BECount->getType(), Count);
535 Value *ModValTmp = B.CreateURem(BECount, CountC);
536 Value *ModValAdd = B.CreateAdd(ModValTmp,
537 ConstantInt::get(ModValTmp->getType(), 1));
538 // At that point (BECount % Count) + 1 could be equal to Count.
539 // To handle this case we need to take mod by Count one more time.
540 return B.CreateURem(ModValAdd, CountC, "xtraiter");
541}
542
543
544/// Insert code in the prolog/epilog code when unrolling a loop with a
545/// run-time trip-count.
546///
547/// This method assumes that the loop unroll factor is total number
548/// of loop bodies in the loop after unrolling. (Some folks refer
549/// to the unroll factor as the number of *extra* copies added).
550/// We assume also that the loop unroll factor is a power-of-two. So, after
551/// unrolling the loop, the number of loop bodies executed is 2,
552/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
553/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
554/// the switch instruction is generated.
555///
556/// ***Prolog case***
557/// extraiters = tripcount % loopfactor
558/// if (extraiters == 0) jump Loop:
559/// else jump Prol:
560/// Prol: LoopBody;
561/// extraiters -= 1 // Omitted if unroll factor is 2.
562/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
563/// if (tripcount < loopfactor) jump End:
564/// Loop:
565/// ...
566/// End:
567///
568/// ***Epilog case***
569/// extraiters = tripcount % loopfactor
570/// if (tripcount < loopfactor) jump LoopExit:
571/// unroll_iters = tripcount - extraiters
572/// Loop: LoopBody; (executes unroll_iter times);
573/// unroll_iter -= 1
574/// if (unroll_iter != 0) jump Loop:
575/// LoopExit:
576/// if (extraiters == 0) jump EpilExit:
577/// Epil: LoopBody; (executes extraiters times)
578/// extraiters -= 1 // Omitted if unroll factor is 2.
579/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
580/// EpilExit:
581
583 Loop *L, unsigned Count, bool AllowExpensiveTripCount,
584 bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
586 const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
587 LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
588 LLVM_DEBUG(L->dump());
589 LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
590 : dbgs() << "Using prolog remainder.\n");
591
592 // Make sure the loop is in canonical form.
593 if (!L->isLoopSimplifyForm()) {
594 LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
595 return false;
596 }
597
598 // Guaranteed by LoopSimplifyForm.
599 BasicBlock *Latch = L->getLoopLatch();
600 BasicBlock *Header = L->getHeader();
601
602 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
603
604 if (!LatchBR || LatchBR->isUnconditional()) {
605 // The loop-rotate pass can be helpful to avoid this in many cases.
607 dbgs()
608 << "Loop latch not terminated by a conditional branch.\n");
609 return false;
610 }
611
612 unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
613 BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
614
615 if (L->contains(LatchExit)) {
616 // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
617 // targets of the Latch be an exit block out of the loop.
619 dbgs()
620 << "One of the loop latch successors must be the exit block.\n");
621 return false;
622 }
623
624 // These are exit blocks other than the target of the latch exiting block.
626 L->getUniqueNonLatchExitBlocks(OtherExits);
627 // Support only single exit and exiting block unless multi-exit loop
628 // unrolling is enabled.
629 if (!L->getExitingBlock() || OtherExits.size()) {
630 // We rely on LCSSA form being preserved when the exit blocks are transformed.
631 // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
632 if (!PreserveLCSSA)
633 return false;
634
635 if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,
636 UseEpilogRemainder)) {
638 dbgs()
639 << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
640 "enabled!\n");
641 return false;
642 }
643 }
644 // Use Scalar Evolution to compute the trip count. This allows more loops to
645 // be unrolled than relying on induction var simplification.
646 if (!SE)
647 return false;
648
649 // Only unroll loops with a computable trip count.
650 // We calculate the backedge count by using getExitCount on the Latch block,
651 // which is proven to be the only exiting block in this loop. This is same as
652 // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
653 // exiting blocks).
654 const SCEV *BECountSC = SE->getExitCount(L, Latch);
655 if (isa<SCEVCouldNotCompute>(BECountSC)) {
656 LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
657 return false;
658 }
659
660 unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
661
662 // Add 1 since the backedge count doesn't include the first loop iteration.
663 // (Note that overflow can occur, this is handled explicitly below)
664 const SCEV *TripCountSC =
665 SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
666 if (isa<SCEVCouldNotCompute>(TripCountSC)) {
667 LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
668 return false;
669 }
670
671 BasicBlock *PreHeader = L->getLoopPreheader();
672 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
673 const DataLayout &DL = Header->getModule()->getDataLayout();
674 SCEVExpander Expander(*SE, DL, "loop-unroll");
675 if (!AllowExpensiveTripCount &&
676 Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
677 TTI, PreHeaderBR)) {
678 LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
679 return false;
680 }
681
682 // This constraint lets us deal with an overflowing trip count easily; see the
683 // comment on ModVal below.
684 if (Log2_32(Count) > BEWidth) {
686 dbgs()
687 << "Count failed constraint on overflow trip count calculation.\n");
688 return false;
689 }
690
691 // Loop structure is the following:
692 //
693 // PreHeader
694 // Header
695 // ...
696 // Latch
697 // LatchExit
698
699 BasicBlock *NewPreHeader;
700 BasicBlock *NewExit = nullptr;
701 BasicBlock *PrologExit = nullptr;
702 BasicBlock *EpilogPreHeader = nullptr;
703 BasicBlock *PrologPreHeader = nullptr;
704
705 if (UseEpilogRemainder) {
706 // If epilog remainder
707 // Split PreHeader to insert a branch around loop for unrolling.
708 NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
709 NewPreHeader->setName(PreHeader->getName() + ".new");
710 // Split LatchExit to create phi nodes from branch above.
711 NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
712 nullptr, PreserveLCSSA);
713 // NewExit gets its DebugLoc from LatchExit, which is not part of the
714 // original Loop.
715 // Fix this by setting Loop's DebugLoc to NewExit.
716 auto *NewExitTerminator = NewExit->getTerminator();
717 NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
718 // Split NewExit to insert epilog remainder loop.
719 EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
720 EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
721
722 // If the latch exits from multiple level of nested loops, then
723 // by assumption there must be another loop exit which branches to the
724 // outer loop and we must adjust the loop for the newly inserted blocks
725 // to account for the fact that our epilogue is still in the same outer
726 // loop. Note that this leaves loopinfo temporarily out of sync with the
727 // CFG until the actual epilogue loop is inserted.
728 if (auto *ParentL = L->getParentLoop())
729 if (LI->getLoopFor(LatchExit) != ParentL) {
730 LI->removeBlock(NewExit);
731 ParentL->addBasicBlockToLoop(NewExit, *LI);
732 LI->removeBlock(EpilogPreHeader);
733 ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
734 }
735
736 } else {
737 // If prolog remainder
738 // Split the original preheader twice to insert prolog remainder loop
739 PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
740 PrologPreHeader->setName(Header->getName() + ".prol.preheader");
741 PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
742 DT, LI);
743 PrologExit->setName(Header->getName() + ".prol.loopexit");
744 // Split PrologExit to get NewPreHeader.
745 NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
746 NewPreHeader->setName(PreHeader->getName() + ".new");
747 }
748 // Loop structure should be the following:
749 // Epilog Prolog
750 //
751 // PreHeader PreHeader
752 // *NewPreHeader *PrologPreHeader
753 // Header *PrologExit
754 // ... *NewPreHeader
755 // Latch Header
756 // *NewExit ...
757 // *EpilogPreHeader Latch
758 // LatchExit LatchExit
759
760 // Calculate conditions for branch around loop for unrolling
761 // in epilog case and around prolog remainder loop in prolog case.
762 // Compute the number of extra iterations required, which is:
763 // extra iterations = run-time trip count % loop unroll factor
764 PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
765 IRBuilder<> B(PreHeaderBR);
766 Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
767 PreHeaderBR);
768 Value *BECount;
769 // If there are other exits before the latch, that may cause the latch exit
770 // branch to never be executed, and the latch exit count may be poison.
771 // In this case, freeze the TripCount and base BECount on the frozen
772 // TripCount. We will introduce two branches using these values, and it's
773 // important that they see a consistent value (which would not be guaranteed
774 // if were frozen independently.)
775 if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&
776 !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {
777 TripCount = B.CreateFreeze(TripCount);
778 BECount =
779 B.CreateAdd(TripCount, Constant::getAllOnesValue(TripCount->getType()));
780 } else {
781 // If we don't need to freeze, use SCEVExpander for BECount as well, to
782 // allow slightly better value reuse.
783 BECount =
784 Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);
785 }
786
787 Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
788
789 Value *BranchVal =
790 UseEpilogRemainder ? B.CreateICmpULT(BECount,
791 ConstantInt::get(BECount->getType(),
792 Count - 1)) :
793 B.CreateIsNotNull(ModVal, "lcmp.mod");
794 BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
795 BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
796 // Branch to either remainder (extra iterations) loop or unrolling loop.
797 MDNode *BranchWeights = nullptr;
798 if (hasBranchWeightMD(*Latch->getTerminator())) {
799 // Assume loop is nearly always entered.
800 MDBuilder MDB(B.getContext());
801 BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);
802 }
803 B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
804 PreHeaderBR->eraseFromParent();
805 if (DT) {
806 if (UseEpilogRemainder)
807 DT->changeImmediateDominator(NewExit, PreHeader);
808 else
809 DT->changeImmediateDominator(PrologExit, PreHeader);
810 }
811 Function *F = Header->getParent();
812 // Get an ordered list of blocks in the loop to help with the ordering of the
813 // cloned blocks in the prolog/epilog code
814 LoopBlocksDFS LoopBlocks(L);
815 LoopBlocks.perform(LI);
816
817 //
818 // For each extra loop iteration, create a copy of the loop's basic blocks
819 // and generate a condition that branches to the copy depending on the
820 // number of 'left over' iterations.
821 //
822 std::vector<BasicBlock *> NewBlocks;
824
825 // Clone all the basic blocks in the loop. If Count is 2, we don't clone
826 // the loop, otherwise we create a cloned loop to execute the extra
827 // iterations. This function adds the appropriate CFG connections.
828 BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
829 BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
830 Loop *remainderLoop = CloneLoopBlocks(
831 L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
832 NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
833
834 // Insert the cloned blocks into the function.
835 F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
836
837 // Now the loop blocks are cloned and the other exiting blocks from the
838 // remainder are connected to the original Loop's exit blocks. The remaining
839 // work is to update the phi nodes in the original loop, and take in the
840 // values from the cloned region.
841 for (auto *BB : OtherExits) {
842 // Given we preserve LCSSA form, we know that the values used outside the
843 // loop will be used through these phi nodes at the exit blocks that are
844 // transformed below.
845 for (PHINode &PN : BB->phis()) {
846 unsigned oldNumOperands = PN.getNumIncomingValues();
847 // Add the incoming values from the remainder code to the end of the phi
848 // node.
849 for (unsigned i = 0; i < oldNumOperands; i++){
850 auto *PredBB =PN.getIncomingBlock(i);
851 if (PredBB == Latch)
852 // The latch exit is handled seperately, see connectX
853 continue;
854 if (!L->contains(PredBB))
855 // Even if we had dedicated exits, the code above inserted an
856 // extra branch which can reach the latch exit.
857 continue;
858
859 auto *V = PN.getIncomingValue(i);
860 if (Instruction *I = dyn_cast<Instruction>(V))
861 if (L->contains(I))
862 V = VMap.lookup(I);
863 PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));
864 }
865 }
866#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
867 for (BasicBlock *SuccBB : successors(BB)) {
868 assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&
869 "Breaks the definition of dedicated exits!");
870 }
871#endif
872 }
873
874 // Update the immediate dominator of the exit blocks and blocks that are
875 // reachable from the exit blocks. This is needed because we now have paths
876 // from both the original loop and the remainder code reaching the exit
877 // blocks. While the IDom of these exit blocks were from the original loop,
878 // now the IDom is the preheader (which decides whether the original loop or
879 // remainder code should run).
880 if (DT && !L->getExitingBlock()) {
881 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
882 // NB! We have to examine the dom children of all loop blocks, not just
883 // those which are the IDom of the exit blocks. This is because blocks
884 // reachable from the exit blocks can have their IDom as the nearest common
885 // dominator of the exit blocks.
886 for (auto *BB : L->blocks()) {
887 auto *DomNodeBB = DT->getNode(BB);
888 for (auto *DomChild : DomNodeBB->children()) {
889 auto *DomChildBB = DomChild->getBlock();
890 if (!L->contains(LI->getLoopFor(DomChildBB)))
891 ChildrenToUpdate.push_back(DomChildBB);
892 }
893 }
894 for (auto *BB : ChildrenToUpdate)
895 DT->changeImmediateDominator(BB, PreHeader);
896 }
897
898 // Loop structure should be the following:
899 // Epilog Prolog
900 //
901 // PreHeader PreHeader
902 // NewPreHeader PrologPreHeader
903 // Header PrologHeader
904 // ... ...
905 // Latch PrologLatch
906 // NewExit PrologExit
907 // EpilogPreHeader NewPreHeader
908 // EpilogHeader Header
909 // ... ...
910 // EpilogLatch Latch
911 // LatchExit LatchExit
912
913 // Rewrite the cloned instruction operands to use the values created when the
914 // clone is created.
915 for (BasicBlock *BB : NewBlocks) {
916 Module *M = BB->getModule();
917 for (Instruction &I : *BB) {
918 RemapInstruction(&I, VMap,
920 RemapDPValueRange(M, I.getDbgRecordRange(), VMap,
922 }
923 }
924
925 if (UseEpilogRemainder) {
926 // Connect the epilog code to the original loop and update the
927 // PHI functions.
928 ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
929 NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
930
931 // Update counter in loop for unrolling.
932 // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
933 // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
934 // thus we must compare the post-increment (wrapping) value.
935 IRBuilder<> B2(NewPreHeader->getTerminator());
936 Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
937 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
938 PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter");
939 NewIdx->insertBefore(Header->getFirstNonPHIIt());
940 B2.SetInsertPoint(LatchBR);
941 auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
942 auto *One = ConstantInt::get(NewIdx->getType(), 1);
943 Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
944 auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
945 Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
946 NewIdx->addIncoming(Zero, NewPreHeader);
947 NewIdx->addIncoming(IdxNext, Latch);
948 LatchBR->setCondition(IdxCmp);
949 } else {
950 // Connect the prolog code to the original loop and update the
951 // PHI functions.
952 ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
953 NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
954 }
955
956 // If this loop is nested, then the loop unroller changes the code in the any
957 // of its parent loops, so the Scalar Evolution pass needs to be run again.
958 SE->forgetTopmostLoop(L);
959
960 // Verify that the Dom Tree and Loop Info are correct.
961#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
962 if (DT) {
963 assert(DT->verify(DominatorTree::VerificationLevel::Full));
964 LI->verify(*DT);
965 }
966#endif
967
968 // For unroll factor 2 remainder loop will have 1 iteration.
969 if (Count == 2 && DT && LI && SE) {
970 // TODO: This code could probably be pulled out into a helper function
971 // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
972 BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
973 assert(RemainderLatch);
974 SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
975 remainderLoop->getBlocks().end());
976 breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
977 remainderLoop = nullptr;
978
979 // Simplify loop values after breaking the backedge
980 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
982 for (BasicBlock *BB : RemainderBlocks) {
983 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
984 if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
985 if (LI->replacementPreservesLCSSAForm(&Inst, V))
986 Inst.replaceAllUsesWith(V);
988 DeadInsts.emplace_back(&Inst);
989 }
990 // We can't do recursive deletion until we're done iterating, as we might
991 // have a phi which (potentially indirectly) uses instructions later in
992 // the block we're iterating through.
994 }
995
996 // Merge latch into exit block.
997 auto *ExitBB = RemainderLatch->getSingleSuccessor();
998 assert(ExitBB && "required after breaking cond br backedge");
999 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
1000 MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
1001 }
1002
1003 // Canonicalize to LoopSimplifyForm both original and remainder loops. We
1004 // cannot rely on the LoopUnrollPass to do this because it only does
1005 // canonicalization for parent/subloops and not the sibling loops.
1006 if (OtherExits.size() > 0) {
1007 // Generate dedicated exit blocks for the original loop, to preserve
1008 // LoopSimplifyForm.
1009 formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
1010 // Generate dedicated exit blocks for the remainder loop if one exists, to
1011 // preserve LoopSimplifyForm.
1012 if (remainderLoop)
1013 formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
1014 }
1015
1016 auto UnrollResult = LoopUnrollResult::Unmodified;
1017 if (remainderLoop && UnrollRemainder) {
1018 LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
1019 UnrollResult =
1020 UnrollLoop(remainderLoop,
1021 {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
1022 /*AllowExpensiveTripCount*/ false,
1023 /*UnrollRemainder*/ false, ForgetAllSCEV},
1024 LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
1025 }
1026
1027 if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
1028 *ResultLoop = remainderLoop;
1029 NumRuntimeUnrolled++;
1030 return true;
1031}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static bool canProfitablyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock * > &OtherExits, BasicBlock *LatchExit, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, unsigned Count)
Create a clone of the blocks in a loop and connect them together.
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, unsigned Count)
Connect the unrolling epilog code to the original loop.
static const uint32_t UnrolledLoopHeaderWeights[]
static Value * CreateTripRemainder(IRBuilder<> &B, Value *BECount, Value *TripCount, unsigned Count)
Calculate ModVal = (BECount + 1) % Count on the abstract integer domain accounting for the possibilit...
static cl::opt< bool > UnrollRuntimeOtherExitPredictable("unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, cl::desc("Assume the non latch exit block to be predictable"))
static const uint32_t EpilogHeaderWeights[]
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE)
Connect the unrolling prolog code to the original loop.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:498
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:354
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:469
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2228
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1338
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1114
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2334
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1222
RPOIterator endRPO() const
Definition: LoopIterator.h:140
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:537
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
void setIncomingBlock(unsigned i, BasicBlock *BB)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getConstant(ConstantInt *V)
bool loopHasNoAbnormalExits(const Loop *L)
Return true if the loop has no abnormal exits.
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:164
bool erase(const KeyT &Val)
Definition: ValueMap.h:190
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
self_iterator getIterator()
Definition: ilist_node.h:109
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
auto successors(const MachineBasicBlock *BB)
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:724
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:42
TargetTransformInfo TTI
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void RemapDPValueRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DPValue V using the value map VM.
Definition: ValueMapper.h:279
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:45
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:147
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:295
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.